def __get_journal(self, a_doc, journalformat): """ let client decide on the format of journal, macro if one is available, abbreviated journal name, or full journal name note that for doctype = software this field is ignored :param a_doc: :param journalformat :return: """ if a_doc.get('doctype', '') == 'software': return None # use macro (default) if journalformat == adsJournalFormat.macro or journalformat == adsJournalFormat.default: journal_macros = dict([ (k, v) for k, v in current_app.config['EXPORT_SERVICE_AASTEX_JOURNAL_MACRO'] ]) return journal_macros.get( self.get_bibstem(a_doc.get('bibstem', '')), encode_laTex(''.join(a_doc.get('pub', '')))) elif journalformat == adsJournalFormat.abbreviated: return Format(None).get_pub_abbrev(a_doc.get('bibstem', '')) elif journalformat == adsJournalFormat.full: return encode_laTex(''.join(a_doc.get('pub', '')))
def __update_title(self): """ Update the container-title if needed for the specific style also apply latex encoding if needed for both title and container-title :return: """ # for mnras we need abbreviation of the journal names # available from adsutils if (self.csl_style == 'mnras'): for data in self.for_cls: data['container-title'] = Format(None).get_pub_abbrev( data['bibstem']) data['title'] = encode_laTex(data['title']) elif (self.csl_style == 'aastex') or (self.csl_style == 'aasj') or (self.csl_style == 'aspc'): # use macro (default) if self.journal_format == adsJournalFormat.macro or self.journal_format == adsJournalFormat.default: journal_macros = dict([ (k, v) for k, v in current_app.config['EXPORT_SERVICE_AASTEX_JOURNAL_MACRO'] ]) for data in self.for_cls: data['container-title'] = journal_macros.get( Format(None).get_bibstem(data['bibstem']), encode_laTex(data['container-title'])) data['title'] = encode_laTex(data['title']) elif self.journal_format == adsJournalFormat.abbreviated: for data in self.for_cls: data['container-title'] = Format(None).get_pub_abbrev( data['bibstem']) data['title'] = encode_laTex(data['title']) elif self.journal_format == adsJournalFormat.full: for data in self.for_cls: data['container-title'] = encode_laTex( data['container-title']) data['title'] = encode_laTex(data['title']) # for SoPh we use journal abbreviation for some special journals only elif (self.csl_style == 'soph'): journal_abbrevation = current_app.config[ 'EXPORT_SERVICE_SOPH_JOURNAL_ABBREVIATION'] for data in self.for_cls: data['container-title'] = journal_abbrevation.get( Format(None).get_bibstem(data['bibstem']), encode_laTex(data['container-title'])) data['title'] = encode_laTex(data['title']) # for the rest just run title and container-title through latex encoding elif (self.csl_style == 'icarus') or (self.csl_style == 'apsj'): for data in self.for_cls: data['container-title'] = encode_laTex(data['container-title']) data['title'] = encode_laTex(data['title'])
def __get_affiliation_list(self, a_doc, maxauthor, authorcutoff): """ format affiliation :param a_doc: :return: """ if 'aff' not in a_doc: return '' counter = self.generate_counter_id( maxauthor if maxauthor != 0 else len(a_doc['aff'])) separator = ', ' affiliation_list = '' affiliation_count = 0 # if number of affiliations exceed the maximum that we display, cut to shorter list # only if maxauthor is none zero (note number of authors and number of affiliations displayed should match), # zero is indication of return all available affiliations cut_affiliations = (len(a_doc['aff']) > authorcutoff) and not maxauthor == 0 addCount = not (a_doc.get('doctype', '') in ['phdthesis', 'mastersthesis']) for affiliation, i in zip(a_doc['aff'], range(len(a_doc['aff']))): if (addCount): affiliation_list += counter[ i] + '(' + affiliation + ')' + separator else: affiliation_list += affiliation + separator if cut_affiliations and i + 1 == maxauthor: # if reached number of required affiliations stop break # do not need the last separator if (len(affiliation_list) > len(separator)): affiliation_list = affiliation_list[:-len(separator)] return encode_laTex(affiliation_list)
def __add_keywords(self, a_doc): """ format keywords :param a_doc: :return: """ if 'keyword' not in a_doc: return '' return encode_laTex(', '.join(a_doc.get('keyword', '')))
def __add_abstract(self, a_doc): """ :param a_doc: :return: """ abstract = a_doc.get('abstract', '').replace('<P />', '\\\\').replace('<BR />', '\\') return encode_laTex(abstract)
def __add_clean_pub_raw(self, a_doc): """ parse pub_raw and eliminate tags :param a_doc: :return: """ pub_raw = ''.join(a_doc.get('pub_raw', '')) # proceed only if necessary if ('<' in pub_raw) and ('>' in pub_raw): for key in self.REGEX_PUB_RAW: pub_raw = key.sub(self.REGEX_PUB_RAW[key], pub_raw) return encode_laTex(pub_raw)
def __encode_latex(self, value, field): """ :param value: :param field: :return: """ if (field == 'author'): return encode_laTex_author(value) # do not encode publication when the format is a macro or if it is bibcode if ((field == 'pub') and (self.REGEX_PUB_MACRO.match(value))) or (field == 'bibcode'): return value return encode_laTex(value)
def __get_journal(self, a_doc, journalformat): """ let client decide on the format of journal, macro if one is available, abbreviated journal name, or full journal name note that for doctype = software this field is ignored :param a_doc: :param journalformat :return: """ doctype = a_doc.get('doctype', '') if doctype == 'software': return None # apply user preference only if pub is assigned to journal field # pub is displayed for booktitle and how_published, in which case it should appears in full need_full_pub = [ 'inbook', 'proceedings', 'inproceedings', 'abstract', 'misc', 'proposal', 'pressrelease', 'talk' ] if doctype in need_full_pub: return encode_laTex(''.join(a_doc.get('pub', ''))) # use macro (default) if journalformat == adsJournalFormat.macro or journalformat == adsJournalFormat.default: journal_macros = dict([ (k, v) for k, v in current_app.config['EXPORT_SERVICE_AASTEX_JOURNAL_MACRO'] ]) return journal_macros.get( self.get_bibstem(a_doc.get('bibstem', '')), encode_laTex(''.join(a_doc.get('pub', '')))) elif journalformat == adsJournalFormat.abbreviated: return Format(None).get_pub_abbrev(a_doc.get('bibstem', '')) elif journalformat == adsJournalFormat.full: return encode_laTex(''.join(a_doc.get('pub', '')))
def __format_output(self, cita, biblio, bibcode, index): """ :param cita: :param biblio: :param bibcode: :param index: :return: """ # apsj is a special case, display biblio as csl has format, just adjust translate characters for LaTex if (self.csl_style == 'apsj'): cita_author, cita_year = '', '' biblio_author = cita biblio_rest = biblio.replace(cita, '') # do not need this, but since we are sending the format all the fields, empty bibcode bibcode = '' else: cita_author, cita_year = self.__tokenize_cita(cita) biblio_author, biblio_rest = self.__tokenize_biblio(biblio) # encode latex stuff if (self.export_format == adsFormatter.latex): cita_author = encode_laTex_author(cita_author) biblio_author = encode_laTex_author(biblio_author) biblio_rest = encode_laTex(biblio_rest) # some adjustments to the what is returned from CSL that we can not do with CSL cita_author = html_to_laTex( self.__update_author_etal_add_emph(cita_author)) biblio_author = html_to_laTex( self.__update_author_etal(str(biblio_author), bibcode)) biblio_rest = html_to_laTex(biblio_rest) format_style = { 'mnras': u'\\bibitem[\\protect\\citeauthoryear{{{}}}{{{}}}]{{{}}} {}{}', 'icarus': u'\\bibitem[{}({})]{{{}}} {}{}', 'soph': u'\\bibitem[{}({})]{{{}}}{}{}', 'aastex': u'\\bibitem[{}({})]{{{}}} {}{}', 'aspc': u'\\bibitem[{}({})]{{{}}} {}{}', 'aasj': u'\\bibitem[{}({})]{{{}}} {}{}', 'apsj': u'{}{}{}{}{}' } return format_style[self.csl_style].format(cita_author, cita_year, bibcode, biblio_author, biblio_rest)
def __encode(self, text, name): """ :param text: :param name: :return: """ if (self.export_format == adsFormatter.unicode): return text if (self.export_format == adsFormatter.html): return cgi.escape(text) if (self.export_format == adsFormatter.latex): if (name == 'author'): return encode_laTex_author(text) # do not encode publication since it could be the macro if (name == 'pub'): return text return encode_laTex(text) return text
def __get_affiliation_list(self, a_doc): """ format affiliation :param a_doc: :return: """ if ('aff') not in a_doc: return '' counter = [''.join(i) for i in product(ascii_uppercase, repeat=2)] separator = ', ' affiliation_list = '' addCount = not (a_doc.get('doctype', '') in ['phdthesis', 'mastersthesis']) for affiliation, i in zip(a_doc['aff'], range(len(a_doc['aff']))): if (addCount): affiliation_list += counter[ i] + '(' + affiliation + ')' + separator else: affiliation_list += affiliation + separator # do not need the last separator if (len(affiliation_list) > len(separator)): affiliation_list = affiliation_list[:-len(separator)] return encode_laTex(affiliation_list)
def __get_doc(self, index, include_abs, maxauthor, authorcutoff, journalformat): """ for each document from Solr, get the fields, and format them accordingly :param index: :param include_abs: :param maxauthor: :param authorcutoff: :param journalformat: :return: """ format_style_bracket_quotes = u'{0:>13} = "{{{1}}}"' format_style_bracket = u'{0:>13} = {{{1}}}' format_style_quotes = u'{0:>13} = "{1}"' format_style = u'{0:>13} = {1}' a_doc = self.from_solr['response'].get('docs')[index] text = self.__get_doc_type(a_doc.get( 'doctype', '')) + '{' + self.__get_key(index) + ',\n' fields = self.__get_fields(a_doc) for field in fields: if (field == 'author') or (field == 'editor'): text += self.__add_in_wrapped( fields[field], self.__get_author_list(a_doc, field, maxauthor, authorcutoff), format_style_bracket) elif (field == 'title'): text += self.__add_in( fields[field], encode_laTex(''.join(a_doc.get(field, ''))), format_style_bracket_quotes) elif (field == 'aff'): text += self.__add_in_wrapped( fields[field], self.__get_affiliation_list(a_doc, maxauthor, authorcutoff), format_style_bracket) elif (field == 'pub_raw'): text += self.__add_in(fields[field], self.__add_clean_pub_raw(a_doc), format_style_bracket) elif (field == 'pub'): text += self.__add_in(fields[field], self.__get_journal(a_doc, journalformat), format_style_bracket) elif (field == 'doi'): text += self.__add_in(fields[field], ''.join(a_doc.get(field, '')), format_style_bracket) elif (field == 'keyword'): text += self.__add_in(fields[field], self.__add_keywords(a_doc), format_style_bracket) elif (field == 'year'): text += self.__add_in( fields[field], a_doc.get(field, '') if a_doc.get(field, '') else None, format_style) elif (field == 'volume') or (field == 'issue'): text += self.__add_in( fields[field], a_doc.get(field, '') if a_doc.get(field, '') else None, format_style_bracket) elif (field == 'month'): text += self.__add_in( fields[field], self.__format_date(a_doc.get('pubdate', '')), format_style) elif (field == 'abstract') and (include_abs): text += self.__add_in_wrapped( fields[field], encode_laTex(a_doc.get(field, '')), format_style_bracket_quotes) elif (field == 'eid'): text += self.__add_in(fields[field], a_doc.get(field, ''), format_style_bracket) elif (field == 'page_range'): text += self.__add_in(fields[field], self.__add_page(a_doc), format_style_bracket) elif (field == 'bibcode'): text += self.__add_in( fields[field], current_app.config['EXPORT_SERVICE_FROM_BBB_URL'] + '/' + a_doc.get(field, ''), format_style_bracket) elif (field == 'adsnotes'): text += self.__add_in( fields[field], current_app.config['EXPORT_SERVICE_ADS_NOTES'], format_style_bracket) elif (field == 'eprintid'): text += self.__add_in_eprint(fields[field], get_eprint(a_doc), format_style_bracket) elif (field == 'arxiv_class'): text += self.__add_in_arxiv_class(fields[field], a_doc.get(field, ''), format_style_bracket) elif (field == 'series') or (field == 'version') or (field == 'publisher'): text += self.__add_in(fields[field], ''.join(a_doc.get(field, '')), format_style_bracket) # remove the last comma, text = text[:-len(',\n')] + '\n' return text + '}\n\n'