def return_article(self, name): titles = return_multval(self.json_data['f'], '12') authors = return_multval(self.json_data['f'], '10') first_page = '' last_page = '' page = return_singleval(self.json_data['f'], '14') if type(page) == type({}): if 'f' in page: first_page = page['f'] if 'l' in page: last_page = page['l'] if first_page == '' or last_page == '': self.article_report.write('\n'+ ' ! ERROR: Missing first and last pages', True, True, False) data4id = name article = Article(data4id, first_page, last_page) article.titles = self.format_titles(titles) article.authors = self.format_author_names(authors) article.section = self.section article.json_data = self.json_data return article
def validate_affiliations(self, errors, warnings): xml_affs = return_multval(self.json_data['f'], '170') affiliations = return_multval(self.json_data['f'], '70') e, w = self.aff_handler.validate_affiliations(xml_affs, affiliations) return (errors + e, warnings + w)
def validate_href(self, img_files): missing_files = [] missing_href = [] href_list = [] img_files = [ name[0:name.rfind('.')] for name in img_files ] if 'body' in self.json_data: href_list = list(set(return_multval(self.json_data['body'], 'file'))) for href in href_list: if not href in img_files: missing_files.append(href) if len(missing_files) > 0: self.article_report.write('\n'+ ' ! ERROR: Expected image files: ' + '\n'+ '\n'.join(missing_files), False, True, False) for file in img_files: if not file in href_list: missing_href.append(file) if len(missing_href) > 0: self.article_report.write('\n'+ ' ! ERROR: Expected graphic/@xlink:href: ' + '\n' + '\n'.join(missing_href), False, True, False) return len(missing_files) + len(missing_href)
def normalize_citation_authors(self, citation): # if '30' then is a journal, delete 18 if '30' in citation.keys(): del citation['18'] # roles roles = return_multval(citation, 'roles') roles = [ self.json_normalizer.normalize_role(r) for r in roles ] #print(roles) if len(roles) > 0: del citation['roles'] authors_monog = return_multval(citation, '16') #print(authors_monog) if len(roles) > 0: for a in authors_monog: a['r'] = roles[len(roles)-1] #print(a) if len(authors_monog) > 0: citation['16'] = authors_monog #print(authors_monog) authors_analyt = return_multval(citation, '10') #print(authors_analyt) if len(roles) > 0: for a in authors_analyt: a['r'] = roles[0] #print(a) if len(authors_analyt) > 0: citation['10'] = authors_analyt #print(authors_analyt) analytic_title = return_multval(citation, '12') if len(analytic_title) == 0: # monographic if len(authors_analyt) > 0: citation['16'] = citation['10'] del citation['10'] if '11' in citation.keys(): citation['17'] = citation['11'] del citation['11'] return citation
def normalize_affiliations(self): affiliations = return_multval(self.json_data['f'], '70') new_affiliations = [ self.aff_handler.complete_affiliation(aff) for aff in affiliations ] new_affiliations = self.aff_handler.complete_affiliations(new_affiliations) id = '' if len(new_affiliations) > 0: self.json_data['f']['70'] = new_affiliations if 'i' in new_affiliations[0].keys(): id = new_affiliations[0]['i'] if id != '': authors = return_multval(self.json_data['f'], '10') new_authors = [] for author in authors: if author != None: if not '1' in author: author['1'] = id new_authors.append(author) if len(new_authors) > 1: self.json_data['f']['10'] = new_authors elif len(new_authors) > 0: self.json_data['f']['10'] = author
def normalize_metadata_authors(self): authors = return_multval(self.json_data['f'], '10') changed = False new_authors = [] for author in authors: if 'z' in author.keys(): author['s'] += ' ' + author['z'] if '1' in author.keys(): if type(author['1']) == type([]): #print(self.json_json_data['f']['10']) author['1'] = ' '.join(author['1']) #print(self.json_json_data['f']['10']) changed = True if 'r' in author.keys(): author['r'] = self.json_normalizer.normalize_role(author['r']) new_authors.append(author) if changed: if len(new_authors) == 1: self.json_data['f']['10'] = new_authors[0] else: self.json_data['f']['10'] = new_authors
def normalize_keywords(self): keyword_groups = return_multval(self.json_data['f'], '85') new = [] for keyword_group in keyword_groups: lang = 'en' if type(keyword_group) == type({}): if 'l' in keyword_group.keys(): lang = keyword_group['l'] for kw in keyword_group['k']: new.append({'k': kw, 'l': lang}) elif type(keyword_group) == type([]): for kw in keyword_group: new.append({'k': kw, 'l': lang}) elif type(keyword_group) == type(''): new.append({'k' : keyword_group, 'l': 'en'}) if len(new) > 0: self.json_data['f']['85'] = new
def return_issue(self, journal): suppl = '' order = '' vol = '' num = '' date = '' if 'f' in self.json_data.keys(): data = self.json_data['f'] else: data = self.json_data suppl = return_singleval(data, '131') suppl = return_singleval(data, '132') vol = return_singleval(data, '31') num = return_singleval(data, '32') date = return_singleval(data, '65') order = return_singleval(data, '36') compl = return_singleval(data, '41') if 'suppl' in num.lower(): if ' ' in num: if '(' in num: suppl = num[num.find('(')+1:] suppl = suppl[0:suppl.find(')')] else: suppl = num[num.rfind(' ')+1:] num = num[0:num.find(' ')] issue = JournalIssue(journal, vol, num, date, suppl, compl, order) i_record = {} keep_list = [30, 31, 32, 132, 35, 42, 65, 100, 480, ] for key, item in data.items(): if int(key) in keep_list: i_record[key] = item i_record['706'] = 'i' i_record['700'] = '0' i_record['701'] = '1' i_record['48'] = [] i_record['48'].append({'l': 'en', 'h': 'Table of Contents'}) i_record['48'].append({'l': 'pt', 'h': 'Sumário'}) i_record['48'].append({'l': 'es', 'h': 'Sumario'}) i_record['36'] = issue.order i_record['35'] = issue.journal.issn_id i_record['2'] = 'br1.1' i_record['930'] = issue.journal.acron.upper() if issue.number != num: if '31' in i_record.keys(): del i_record['31'] i_record['32'] = issue.number toc = return_multval(data, '49') for item in toc: lang = 'en' title = '' if 't' in item: title = item['t'] if 'l' in item: lang = item['l'] if 'c' in item: code = item['c'] section = Section(title, code, lang) issue.toc.insert(section, False) issue.json_data = i_record return issue