def test_build_default_field_content(self):
        """solrutils - build default field content"""
        tags = get_tags()

        self.assertEqual(u'Ellis, J Enqvist, K Nanopoulos, D V',
                         get_field_content_in_utf8(18, 'author', tags))

        self.assertEqual(u'Kahler manifolds gravitinos axions constraints noscale',
                         get_field_content_in_utf8(18, 'keyword', tags))

        self.assertEqual(u'In 1962, CERN hosted the 11th International Conference on High Energy Physics. Among the distinguished visitors were eight Nobel prizewinners.Left to right: Cecil F. Powell, Isidor I. Rabi, Werner Heisenberg, Edwin M. McMillan, Emile Segre, Tsung Dao Lee, Chen Ning Yang and Robert Hofstadter.',
                         get_field_content_in_utf8(6, 'abstract', tags))
    def test_build_default_field_content(self):
        """solrutils - build default field content"""
        tags = get_tags()

        self.assertEqual(u'Ellis, J Enqvist, K Nanopoulos, D V',
                         get_field_content_in_utf8(18, 'author', tags))

        self.assertEqual(
            u'Kahler manifolds gravitinos axions constraints noscale',
            get_field_content_in_utf8(18, 'keyword', tags))

        self.assertEqual(
            u'In 1962, CERN hosted the 11th International Conference on High Energy Physics. Among the distinguished visitors were eight Nobel prizewinners.Left to right: Cecil F. Powell, Isidor I. Rabi, Werner Heisenberg, Edwin M. McMillan, Emile Segre, Tsung Dao Lee, Chen Ning Yang and Robert Hofstadter.',
            get_field_content_in_utf8(6, 'abstract', tags))
    def test_build_custom_field_content(self):
        """solrutils - build custom field content"""
        tags = {'abstract': ['520__%', '590__%']}

        self.assertEqual(
            u"""In 1962, CERN hosted the 11th International Conference on High Energy Physics. Among the distinguished visitors were eight Nobel prizewinners.Left to right: Cecil F. Powell, Isidor I. Rabi, Werner Heisenberg, Edwin M. McMillan, Emile Segre, Tsung Dao Lee, Chen Ning Yang and Robert Hofstadter. En 1962, le CERN est l'hote de la onzieme Conference Internationale de Physique des Hautes Energies. Parmi les visiteurs eminents se trouvaient huit laureats du prix Nobel.De gauche a droite: Cecil F. Powell, Isidor I. Rabi, Werner Heisenberg, Edwin M. McMillan, Emile Segre, Tsung Dao Lee, Chen Ning Yang et Robert Hofstadter.""",
            get_field_content_in_utf8(6, 'abstract', tags))
Esempio n. 4
0
def solr_add_range(lower_recid, upper_recid, tags_to_index,
                   next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract = get_field_content_in_utf8(recid, 'abstract',
                                                 tags_to_index)
            author = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword = get_field_content_in_utf8(recid, 'keyword',
                                                tags_to_index)
            title = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs = BibRecDocs(recid)
                fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
                abstract = unicode(
                    get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8')
            except:
                abstract = ""
            try:
                first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]
                additional_authors = reduce(
                    lambda x, y: x + " " + y,
                    get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME),
                    '')
                author = unicode(first_author + " " + additional_authors,
                                 'utf-8')
            except:
                author = ""
            try:
                fulltext = unicode(get_entire_fulltext(recid), 'utf-8')
            except:
                fulltext = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,
                                                           recid=recid)

    return next_commit_counter
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract        = get_field_content_in_utf8(recid, 'abstract', tags_to_index)
            author          = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword         = get_field_content_in_utf8(recid, 'keyword', tags_to_index)
            title           = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs  = BibRecDocs(recid)
                fulltext    = unicode(bibrecdocs.get_text(), 'utf-8')
            except:
                fulltext    = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid)

    return next_commit_counter
Esempio n. 6
0
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract        = get_field_content_in_utf8(recid, 'abstract', tags_to_index)
            author          = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword         = get_field_content_in_utf8(recid, 'keyword', tags_to_index)
            title           = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs  = BibRecDocs(recid)
                fulltext    = unicode(bibrecdocs.get_text(), 'utf-8')
            except:
                fulltext    = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid)

    return next_commit_counter
    def test_build_custom_field_content(self):
        """solrutils - build custom field content"""
        tags = {'abstract': ['520__%', '590__%']}

        self.assertEqual(u"""In 1962, CERN hosted the 11th International Conference on High Energy Physics. Among the distinguished visitors were eight Nobel prizewinners.Left to right: Cecil F. Powell, Isidor I. Rabi, Werner Heisenberg, Edwin M. McMillan, Emile Segre, Tsung Dao Lee, Chen Ning Yang and Robert Hofstadter. En 1962, le CERN est l'hote de la onzieme Conference Internationale de Physique des Hautes Energies. Parmi les visiteurs eminents se trouvaient huit laureats du prix Nobel.De gauche a droite: Cecil F. Powell, Isidor I. Rabi, Werner Heisenberg, Edwin M. McMillan, Emile Segre, Tsung Dao Lee, Chen Ning Yang et Robert Hofstadter.""",
                         get_field_content_in_utf8(6, 'abstract', tags))