예제 #1
0
    def analyse_failed_docs(self):
        self.get_config()

        print 'Analysing failed docs'
        load_config = self.get_load_config()
        failed_docs_files = load_config.get_failed_docs_files()
        print len(failed_docs_files), 'failed doc files'
        for failed_docs_file in failed_docs_files:
            print 'Loading file:', failed_docs_file
            failed_docs = file_utils.load_file_path(failed_docs_file)
            for failed_doc in failed_docs:
                reason = failed_docs[failed_doc]['reason']
                doc = failed_docs[failed_doc]['doc']
                print failed_doc
                error_reason = None
                if isinstance(reason, dict):
                    if 'index' in reason:
                        index = reason['index']

                    elif 'update' in reason:
                        index = reason['update']

                    if 'error' in index:
                        error = index['error']
                        if 'reason' in error:
                            error_reason = error['reason']

                if error_reason is None or len(error_reason) == 0:
                    print failed_docs[failed_doc]['reason']
                else:
                    print 'Reason:', error_reason

                print_doc = raw_input('Print doc?')
                if print_doc.lower() in ['y', 'yes']:
                    print json.dumps(doc)
예제 #2
0
    def analyse_failed_docs(self):
        self.get_config()

        print 'Analysing failed docs'
        load_config = self.get_load_config()
        failed_docs_files = load_config.get_failed_docs_files()
        print len(failed_docs_files), 'failed doc files'
        for failed_docs_file in failed_docs_files:
            print 'Loading file:', failed_docs_file
            failed_docs = file_utils.load_file_path(failed_docs_file)
            for failed_doc in failed_docs:
                reason = failed_docs[failed_doc]['reason']
                print failed_doc
                if isinstance(reason, dict):
                    if 'index' in reason:
                        index = reason['index']
                        if 'error' in index:
                            error = index['error']
                            if 'reason' in error:
                                error_reason = error['reason']
                                print error_reason
                else:
                    print reason

                raw_input('Continue?')
예제 #3
0
    def process_rows(self, process_row_method):
        super(JSONDataSource, self).process_rows(process_row_method)
        data = file_utils.load_file_path(self.data_source_file_path)

        self.current_index = 0
        for doc in data:
            if not self.process_row_method(doc, self.current_index):
                break
            self.current_index += 1
예제 #4
0
    def process_rows(self, process_row_method):
        super(CrossrefEventsDataSource, self).process_rows(process_row_method)
    
        self.current_index = 0

        cursor_data = file_utils.load_file_path(self.cursor_file_path)
        cursor = None
        if 'cursor' in cursor_data:
            cursor = cursor_data['cursor']

        print 'Cursor file path:', self.cursor_file_path
        print 'Previous cursor:', cursor

        crossref_events_api = CrossRefEventsAPI()
        crossref_events_api.stream_events_for_doi_prefix(self.doi_prefix, self.works_fetched, cursor)