def normalize_document_data(self, issue, name): self.json_data['f']['120'] = 'XML_' + return_singleval(self.json_data['f'], '120') self.json_data['f']['42'] = '1' if self.json_data['f']['32'] == 'ahead': self.json_data['f']['121'] = name if 'epub' in self.json_data['f'].keys(): self.json_data['f'] = self.json_normalizer.normalize_dates(self.json_data['f'], 'epub', '223', 'epub') if 'epub' in self.json_data['f'].keys(): del self.json_data['f']['epub'] section = Section(return_singleval(self.json_data['f'], '49')) self.section = issue.toc.return_section(section) if self.section == None: self.section = section self.json_data['f']['49'] = self.section.code self.normalize_metadata_authors() self.normalize_illustrative_materials() self.normalize_affiliations() self.normalize_keywords() self.json_data['f'] = self.json_normalizer.convert_value(self.json_data['f'], '71', 'doctopic') self.json_data['f'] = self.json_normalizer.normalize_dates(self.json_data['f'], '111', '112', '111') self.json_data['f'] = self.json_normalizer.normalize_dates(self.json_data['f'], '113', '114', '113') self.json_data['h'] = self.json_normalizer.format_for_indexing(self.json_data['f']) self.json_data['l'] = self.json_normalizer.format_for_indexing(self.json_data['h'])
def normalize(self, issn_id): self.json_data['f']['35'] = issn_id self.json_data['f']['120'] = 'XML_' + return_singleval(self.json_data['f'], '120') self.json_data['f']['42'] = '1' self.section = Section(return_singleval(self.json_data['f'], '49')) #self.json_data['f']['49'] = self.section.code self.normalize_metadata_authors() self.normalize_illustrative_materials() self.normalize_affiliations() self.normalize_keywords() self.json_data['f'] = self.json_normalizer.convert_value(self.json_data['f'], '71', 'doctopic') self.json_data['f'] = self.json_normalizer.normalize_dates(self.json_data['f'], '64', '65', '64') self.json_data['f'] = self.json_normalizer.normalize_dates(self.json_data['f'], '112', '111', '112') self.json_data['f'] = self.json_normalizer.normalize_dates(self.json_data['f'], '114', '113', '114') self.publication_dateiso = return_singleval(self.json_data['f'], '65') self.json_data['h'] = self.json_normalizer.format_for_indexing(self.json_data['f']) self.json_data['l'] = self.json_normalizer.format_for_indexing(self.json_data['h'])
def journal_title(self): if 'f' in self.json_data.keys(): json_data = self.json_data['f'] else: json_data = self.json_data r = return_singleval(json_data, '100') if r == '': r = return_singleval(json_data, '130') return r
def return_article(self, name): titles = return_multval(self.json_data['f'], '12') authors = return_multval(self.json_data['f'], '10') first_page = '' last_page = '' page = return_singleval(self.json_data['f'], '14') if type(page) == type({}): if 'f' in page: first_page = page['f'] if 'l' in page: last_page = page['l'] if first_page == '' or last_page == '': self.article_report.write('\n'+ ' ! ERROR: Missing first and last pages', True, True, False) data4id = name article = Article(data4id, first_page, last_page) article.titles = self.format_titles(titles) article.authors = self.format_author_names(authors) article.section = self.section article.json_data = self.json_data return article
def return_issue(self, journal): suppl = '' order = '' vol = '' num = '' date = '' if 'f' in self.json_data.keys(): data = self.json_data['f'] else: data = self.json_data suppl = return_singleval(data, '131') suppl = return_singleval(data, '132') vol = return_singleval(data, '31') num = return_singleval(data, '32') date = return_singleval(data, '65') order = return_singleval(data, '36') compl = return_singleval(data, '41') if 'suppl' in num.lower(): if ' ' in num: if '(' in num: suppl = num[num.find('(')+1:] suppl = suppl[0:suppl.find(')')] else: suppl = num[num.rfind(' ')+1:] num = num[0:num.find(' ')] issue = JournalIssue(journal, vol, num, date, suppl, compl, order) i_record = {} keep_list = [30, 31, 32, 132, 35, 42, 65, 100, 480, ] for key, item in data.items(): if key.isdigit(): if int(key) in keep_list: i_record[key] = item i_record['706'] = 'i' i_record['700'] = '0' i_record['701'] = '1' i_record['48'] = [] i_record['48'].append({'l': 'en', 'h': 'Table of Contents'}) i_record['48'].append({'l': 'pt', 'h': 'Sumário'}) i_record['48'].append({'l': 'es', 'h': 'Sumario'}) i_record['36'] = issue.order i_record['35'] = issue.journal.issn_id i_record['2'] = 'br1.1' i_record['930'] = issue.journal.acron.upper() if issue.number != num: if '31' in i_record.keys(): del i_record['31'] i_record['32'] = issue.number issue.json_data = i_record return issue
def normalize_illustrative_materials(self): types = {'900': 'TAB', '901': 'GRA'} illustrative_materials = [] for tag, type in types.items(): count = return_singleval(self.json_data['f'], tag) if len(count)>0: if int(count)>0: illustrative_materials.append(type) del self.json_data['f'][tag] if len(illustrative_materials) > 0: self.json_data['f']['38'] = illustrative_materials else: self.json_data['f']['38'] = 'ND'
def normalize_issue_data(self, issn_id): self.json_data['f']['35'] = issn_id test_vol = self.json_data['f']['31'].replace('0','') test_num = self.json_data['f']['32'].replace('0','') if test_vol + test_num == '': if '31' in self.json_data['f'].keys(): del self.json_data['f']['31'] self.json_data['f']['32'] = 'ahead' #self.section = Section(return_singleval(self.json_data['f'], '49')) #self.json_data['f']['49'] = self.section.code self.json_data['f'] = self.json_normalizer.normalize_dates(self.json_data['f'], '64', '65', '64') self.publication_dateiso = return_singleval(self.json_data['f'], '65')
def normalize_citation_title_language(self, citation): lang = return_singleval(citation, '40') if len(lang) == 0: lang = 'en' if '18' in citation.keys(): monog_title = citation['18'] citation['18'] = { 'l': lang, '_': monog_title} if '12' in citation.keys(): if type(citation['12']) == type([]): for title in citation['12']: if not 'l' in title.keys(): title['l'] = lang elif type(citation['12']) == type({}): if not 'l' in citation['12']: citation['12']['l'] = lang else: citation['12'] = { 'l': lang, '_':citation['12']} return citation
def journal_issn_id(self): if 'f' in self.json_data.keys(): json_data = self.json_data['f'] else: json_data = self.json_data return return_singleval(json_data, '400')
def journal_acron(self): if 'f' in self.json_data.keys(): json_data = self.json_data['f'] else: json_data = self.json_data return return_singleval(json_data, '68')
def validate_pages(self, errors, warnings): pages = return_singleval(self.json_data['f'], '14') if not 'f' in pages: errors.append('Missing pages') return (errors, warnings)