def bkp_analyze_document(self, xml_filename, package, folder_table_name): generic_document = None json_data = self.xml2json.convert(xml_filename, package.report) if type(json_data) != type({}): package.report.write(' ! ERROR: Invalid JSON ' + xml_filename, False, False, False, json_data) else: img_files = package.return_matching_files(xml_filename, '.jpg') self.json2model.set_data(json_data, xml_filename, package.report) publication_title = self.json2model.publication_title registered = self.registered_titles.return_registered(publication_title, package.report) if registered != None: specific_document, errors, warnings, refcount = self.json2model.return_document(registered, img_files) if specific_document != None: generic_document = Document(specific_document) package.report.write(generic_document.display(), True, False, False) specific_folder = self.all_folders.template(generic_document.folder) if specific_folder.status == 'not_registered': package.report.write("\n" + ' ! WARNING: ' + specific_folder.display() + ' is not registered in ' + folder_table_name + ', but it will be registered provisionally. After being oficially registered, it must be reprocessed.' + "\n" , True, True, True ) print('generic_document.folder toc:') print(generic_document.folder.toc.return_json()) print('specific_folder toc:') print(specific_folder.toc.return_json()) incoherences = self.all_folders.return_incoherences(specific_folder, generic_document.folder) if len(incoherences) > 0: package.report.write(' ! ERROR: There are inconsistencies of data ' + xml_filename, True, True, True) for err in incoherences: package.report.write(err, True, True, True) generic_document.folder = specific_folder #print('generic_document.folder = specific_folder') generic_document.folder.toc.insert(generic_document.section, False) print('generic_document.folder toc:') print(generic_document.folder.toc.return_json()) if generic_document.folder.documents == None: generic_document.folder.documents = Documents() generic_document.folder.documents.insert(generic_document.document, True) return generic_document
def process_document(self, xml_filename, package, folder_table_name): fatal_errors = [] generic_document = None if self.extract_data(xml_filename, package.report): publication_title = self.json2model.publication_title registered = None if len(publication_title) > 0: registered = self.registered_titles.return_registered(publication_title) if registered == None: package.report.write('Invalid publication title:' + publication_title, True, True) else: document_folder = self.json2model.return_folder(registered) #print(';'*20) #print('document_folder') #print(document_folder) #if document_folder is not None: # #print(document_folder.documents) # #if document_folder.documents is not None: # #print(document_folder.documents.elements) #print(';'*20) selected_folder = self.check_folder(document_folder, package) if selected_folder.status == 'registered': #print('-'*20) #print('selected_folder') #print(selected_folder) #if selected_folder is not None: # #print(selected_folder.documents) # if selected_folder.documents is not None: # print(selected_folder.documents.elements) #print('-'*20) specific_document = self.json2model.return_doc(selected_folder) #print(':'*20) #print('specific_document') #print(specific_document) #print(specific_document.folder) #if specific_document.folder is not None: # print(specific_document.folder.documents) # if specific_document.folder.documents is not None: # print(specific_document.folder.documents.elements) #print(':'*20) if specific_document != None: if not specific_document.doi == '': if not 'ahead' in specific_document.issue.name: pid, fname = self.ahead_articles.return_id_and_filename(specific_document.doi, specific_document.issue.journal.issn_id, specific_document.titles) specific_document.set_previous_id(pid) #print(':'*20) #print('generic_document') #print(generic_document) #print(':'*20) generic_document = Document(specific_document) #print('>'*20) #print('generic_document depois') #print(generic_document) #print(generic_document.folder) #print(generic_document.folder.documents) #if generic_document.folder.documents is not None: # print(generic_document.folder.documents.elements) #print('>'*20) package.report.write(generic_document.display(), True, True, False) img_files = package.return_matching_files(xml_filename, '.jpg') fatal_errors, e, w, ref_count = self.json2model.evaluate_data(img_files) if generic_document.folder.documents == None: generic_document.folder.documents = Documents() if specific_document.doi == '' and specific_document.issue.name[-2:] != 'pr': fatal_errors.append('FATAL ERROR: Missing DOI') package.report.write('FATAL ERROR: Missing DOI', True, True) else: if not generic_document.folder.documents.insert(generic_document.document, False): package.report.write('FATAL ERROR: This document has doi (' + generic_document.document.doi + ') or order(' + generic_document.document.order + ') of another document.', True, True) fatal_errors.append('FATAL ERROR: This document has doi (' + generic_document.document.doi + ') or order(' + generic_document.document.order + ') of another document.') else: package.report.write('', True, True) return (fatal_errors, generic_document)