def analyse_failed_docs(self): self.get_config() print 'Analysing failed docs' load_config = self.get_load_config() failed_docs_files = load_config.get_failed_docs_files() print len(failed_docs_files), 'failed doc files' for failed_docs_file in failed_docs_files: print 'Loading file:', failed_docs_file failed_docs = file_utils.load_file_path(failed_docs_file) for failed_doc in failed_docs: reason = failed_docs[failed_doc]['reason'] doc = failed_docs[failed_doc]['doc'] print failed_doc error_reason = None if isinstance(reason, dict): if 'index' in reason: index = reason['index'] elif 'update' in reason: index = reason['update'] if 'error' in index: error = index['error'] if 'reason' in error: error_reason = error['reason'] if error_reason is None or len(error_reason) == 0: print failed_docs[failed_doc]['reason'] else: print 'Reason:', error_reason print_doc = raw_input('Print doc?') if print_doc.lower() in ['y', 'yes']: print json.dumps(doc)
def analyse_failed_docs(self): self.get_config() print 'Analysing failed docs' load_config = self.get_load_config() failed_docs_files = load_config.get_failed_docs_files() print len(failed_docs_files), 'failed doc files' for failed_docs_file in failed_docs_files: print 'Loading file:', failed_docs_file failed_docs = file_utils.load_file_path(failed_docs_file) for failed_doc in failed_docs: reason = failed_docs[failed_doc]['reason'] print failed_doc if isinstance(reason, dict): if 'index' in reason: index = reason['index'] if 'error' in index: error = index['error'] if 'reason' in error: error_reason = error['reason'] print error_reason else: print reason raw_input('Continue?')
def process_rows(self, process_row_method): super(JSONDataSource, self).process_rows(process_row_method) data = file_utils.load_file_path(self.data_source_file_path) self.current_index = 0 for doc in data: if not self.process_row_method(doc, self.current_index): break self.current_index += 1
def process_rows(self, process_row_method): super(CrossrefEventsDataSource, self).process_rows(process_row_method) self.current_index = 0 cursor_data = file_utils.load_file_path(self.cursor_file_path) cursor = None if 'cursor' in cursor_data: cursor = cursor_data['cursor'] print 'Cursor file path:', self.cursor_file_path print 'Previous cursor:', cursor crossref_events_api = CrossRefEventsAPI() crossref_events_api.stream_events_for_doi_prefix(self.doi_prefix, self.works_fetched, cursor)