コード例 #1
0
    def test_eprint(self):
        a_doc_no_eprint = solrdata.data['response'].get('docs')[0]
        assert (get_eprint(a_doc_no_eprint) == '')

        a_doc_arxiv = \
            {
                "bibcode": "2018arXiv180303598K",
                "eid": "arXiv:1803.03598"
            }
        assert (get_eprint(a_doc_arxiv) == 'arXiv:1803.03598')

        a_doc_ascl = \
            {
                "bibcode": "2013ascl.soft08009C",
                "eid": "ascl:1308.009"
            }
        assert (get_eprint(a_doc_ascl) == 'ascl:1308.009')
コード例 #2
0
ファイル: customFormat.py プロジェクト: ysBach/export_service
    def __get_doc(self, index):
        """

        :param index: index to the docs structure returned from solr
        :return:
        """
        result = self.custom_format
        a_doc = self.from_solr['response'].get('docs')[index]
        for field in self.parsed_spec:
            if (field[2] == 'title') or (field[2] == 'doi') or (field[2]
                                                                == 'comment'):
                result = self.__add_in(result, field,
                                       ''.join(a_doc.get(field[2], '')))
            elif (field[2] == 'author'):
                result = self.__add_in(result, field,
                                       self.__get_author_list(field[1], index))
            elif (field[2] == 'doctype'):
                result = self.__add_in(result, field, a_doc.get(field[2], ''))
            elif (field[2] == 'pubdate'):
                result = self.__add_in(
                    result, field,
                    self.__format_date(a_doc.get(field[2], ''), field[1][-1]))
            elif (field[2] == 'aff'):
                result = self.__add_in(result, field,
                                       self.__get_affiliation_list(a_doc))
            elif (field[2] == 'keyword'):
                result = self.__add_in(result, field,
                                       self.__get_keywords(a_doc))
            elif (field[2] == 'url'):
                result = self.__add_in(
                    result, field,
                    self.__format_url(a_doc.get('bibcode', ''), field[1][-1]))
            elif (field[2] == 'abstract') or (field[2] == 'copyright') or (field[2] == 'bibcode') or \
                 (field[2] == 'volume') or (field[2] == 'year'):
                result = self.__add_in(result, field, a_doc.get(field[2], ''))
            elif (field[2] == 'pub') or (field[2] == 'pub_raw'):
                result = self.__add_in(result, field,
                                       self.__get_publication(field[1], a_doc))
            elif (field[2] == 'citation_count'):
                result = self.__add_in(result, field,
                                       str(a_doc.get(field[2], '')))
            elif (field[2] == 'eid,identifier'):
                result = self.__add_in(result, field, get_eprint(a_doc))
            elif (field[2]
                  == 'page,page_range') or (field[2]
                                            == 'lastpage,page_range') or (
                                                field[2] == 'page_range,page'):
                result = self.__add_in(result, field,
                                       self.__get_page(field[2], a_doc))
        result += self.line_feed

        return self.__format_line_wrapped(result, index)
コード例 #3
0
    def __get_doc_reference_xml(self, index, parent, includeAbs):
        """
        for each document from Solr, get the fields, and format them accordingly for Reference format

        :param index:
        :param parent:
        :param includeAbs:
        :return:
        """
        a_doc = self.from_solr['response'].get('docs')[index]
        fields = self.__get_fields(self.EXPORT_FORMAT_REF_XML)
        record = ET.SubElement(parent, "record")
        for field in fields:
            if (field == 'bibcode') or (field == 'pub') or (field == 'volume') or \
               (field == 'copyright'):
                self.__add_in(record, fields[field], a_doc.get(field, ''))
            elif (field == 'title') or (field == 'page') or (field == 'doi'):
                self.__add_in(record, fields[field],
                              ''.join(a_doc.get(field, '')))
            elif (field == 'author'):
                self.__add_author_list(a_doc, record, fields[field])
            elif (field == 'aff'):
                self.__add_affiliation_list(a_doc, record, fields[field])
            elif (field == 'date'):
                self.__add_in(
                    record, fields[field],
                    self.__format_date(a_doc.get(field, ''),
                                       self.EXPORT_FORMAT_REF_XML))
            elif (field == 'pub_raw'):
                self.__add_pub_raw(a_doc, record, fields[field],
                                   self.EXPORT_FORMAT_REF_XML)
            elif (field == 'keyword'):
                self.__add_keywords(a_doc, record, self.EXPORT_FORMAT_REF_XML)
            elif (field == 'url'):
                self.__add_in(
                    record, fields[field],
                    current_app.config.get('EXPORT_SERVICE_FROM_BBB_URL') +
                    '/' + a_doc.get('bibcode', ''))
            elif (field == 'citation_count'):
                self.__add_in(
                    record, fields[field],
                    self.__get_citation(int(a_doc.get(field, 0)),
                                        self.EXPORT_FORMAT_REF_XML))
            elif (field == 'abstract') and (includeAbs):
                self.__add_in(record, fields[field],
                              self.__format_line_wrapped(a_doc.get(field, '')))
            elif (field == 'link'):
                self.__add_doc_links(a_doc, record)
            elif (field == 'eprintid'):
                self.__add_in(record, fields[field], get_eprint(a_doc))
コード例 #4
0
ファイル: bibTexFormat.py プロジェクト: ysBach/export_service
    def __get_doc(self, index, include_abs, maxauthor, authorcutoff,
                  journalformat):
        """
        for each document from Solr, get the fields, and format them accordingly

        :param index:
        :param include_abs:
        :param maxauthor:
        :param authorcutoff:
        :param journalformat:
        :return:
        """
        format_style_bracket_quotes = u'{0:>13} = "{{{1}}}"'
        format_style_bracket = u'{0:>13} = {{{1}}}'
        format_style_quotes = u'{0:>13} = "{1}"'
        format_style = u'{0:>13} = {1}'

        a_doc = self.from_solr['response'].get('docs')[index]
        text = self.__get_doc_type(a_doc.get(
            'doctype', '')) + '{' + self.__get_key(index) + ',\n'

        fields = self.__get_fields(a_doc)
        for field in fields:
            if (field == 'author') or (field == 'editor'):
                text += self.__add_in_wrapped(
                    fields[field],
                    self.__get_author_list(a_doc, field, maxauthor,
                                           authorcutoff), format_style_bracket)
            elif (field == 'title'):
                text += self.__add_in(
                    fields[field], encode_laTex(''.join(a_doc.get(field, ''))),
                    format_style_bracket_quotes)
            elif (field == 'aff'):
                text += self.__add_in_wrapped(
                    fields[field],
                    self.__get_affiliation_list(a_doc, maxauthor,
                                                authorcutoff),
                    format_style_bracket)
            elif (field == 'pub_raw'):
                text += self.__add_in(fields[field],
                                      self.__add_clean_pub_raw(a_doc),
                                      format_style_bracket)
            elif (field == 'pub'):
                text += self.__add_in(fields[field],
                                      self.__get_journal(a_doc, journalformat),
                                      format_style_bracket)
            elif (field == 'doi'):
                text += self.__add_in(fields[field],
                                      ''.join(a_doc.get(field, '')),
                                      format_style_bracket)
            elif (field == 'keyword'):
                text += self.__add_in(fields[field],
                                      self.__add_keywords(a_doc),
                                      format_style_bracket)
            elif (field == 'year'):
                text += self.__add_in(
                    fields[field],
                    a_doc.get(field, '') if a_doc.get(field, '') else None,
                    format_style)
            elif (field == 'volume') or (field == 'issue'):
                text += self.__add_in(
                    fields[field],
                    a_doc.get(field, '') if a_doc.get(field, '') else None,
                    format_style_bracket)
            elif (field == 'month'):
                text += self.__add_in(
                    fields[field],
                    self.__format_date(a_doc.get('pubdate', '')), format_style)
            elif (field == 'abstract') and (include_abs):
                text += self.__add_in_wrapped(
                    fields[field], encode_laTex(a_doc.get(field, '')),
                    format_style_bracket_quotes)
            elif (field == 'eid'):
                text += self.__add_in(fields[field], a_doc.get(field, ''),
                                      format_style_bracket)
            elif (field == 'page_range'):
                text += self.__add_in(fields[field], self.__add_page(a_doc),
                                      format_style_bracket)
            elif (field == 'bibcode'):
                text += self.__add_in(
                    fields[field],
                    current_app.config['EXPORT_SERVICE_FROM_BBB_URL'] + '/' +
                    a_doc.get(field, ''), format_style_bracket)
            elif (field == 'adsnotes'):
                text += self.__add_in(
                    fields[field],
                    current_app.config['EXPORT_SERVICE_ADS_NOTES'],
                    format_style_bracket)
            elif (field == 'eprintid'):
                text += self.__add_in_eprint(fields[field], get_eprint(a_doc),
                                             format_style_bracket)
            elif (field == 'arxiv_class'):
                text += self.__add_in_arxiv_class(fields[field],
                                                  a_doc.get(field, ''),
                                                  format_style_bracket)
            elif (field == 'series') or (field
                                         == 'version') or (field
                                                           == 'publisher'):
                text += self.__add_in(fields[field],
                                      ''.join(a_doc.get(field, '')),
                                      format_style_bracket)

        # remove the last comma,
        text = text[:-len(',\n')] + '\n'

        return text + '}\n\n'
コード例 #5
0
    def __get_doc(self, index, fields, export_format):
        """

        :param index:
        :param fields:
        :param export_format:
        :return:
        """
        result = ''
        a_doc = self.from_solr['response'].get('docs')[index]
        for field in fields:
            if (field == 'title') or (field == 'page') or (field == 'doi') or (field == 'isbn') or \
                    (field == 'pubnote') or (field == 'issn') or (field == 'pub'):
                result += self.__add_in(fields[field],
                                        ''.join(a_doc.get(field, '')))
            elif (field == 'lastpage'):
                result += self.__add_in(
                    fields[field],
                    self.___get_last_page(a_doc.get('page_range', '')))
            elif (field == 'author'):
                result += self.__add_author_list(a_doc, export_format,
                                                 fields[field])
            elif (field == 'doctype'):
                result += self.__add_in(
                    fields[field],
                    self.__get_doc_type(a_doc.get(field, ''), export_format))
            elif (field == 'pubdate'):
                result += self.__add_in(
                    fields[field],
                    self.__format_date(a_doc.get(field, ''), export_format))
            elif (field == 'abstract'):
                # 9/18/2020 as per request of a user, no line wrapping abstract
                result += self.__add_in(fields[field],
                                        self.__add_abstract(a_doc))
            elif (field == 'aff'):
                result += self.__get_affiliation_list(a_doc, export_format,
                                                      fields[field])
            elif (field == 'keyword'):
                result += self.__add_keywords(a_doc, export_format,
                                              fields[field])
            elif (field == 'comment'):
                result += self.__add_in(
                    fields[field],
                    self.__format_line_wrapped(self.__add_comment(a_doc)))
            elif (field == 'url'):
                result += self.__add_in(
                    fields[field],
                    current_app.config['EXPORT_SERVICE_FROM_BBB_URL'] + '/' +
                    a_doc.get('bibcode', ''))
            elif (field == 'endRecord'):
                result += (fields[field] + '\n')
            elif (field == 'pub_raw'):
                result += self.__add_in(fields[field],
                                        self.__add_clean_pub_raw(a_doc))
            elif (field == 'links'):
                result += self.__add_doc_links(a_doc, fields[field])
            elif (field == 'eprintid'):
                result += self.__add_in(fields[field], get_eprint(a_doc))
            elif (field == 'bibstem'):
                result += self.__add_in(fields[field],
                                        a_doc.get(field, ['', ''])[0])
            else:
                result += self.__add_in(fields[field], a_doc.get(field, ''))
        # line feed once the doc is complete
        return result + '\n\n'