Example #1
0
 def get_facets_for_query(self, qid, limit=20, parent=None):
     from invenio.search_engine_config import CFG_LANGUAGE_DICT as language_dict
     facet = get_most_popular_field_values(self.get_recids(qid),
                                           get_field_tags(self.name)
                                           )[0:limit]
     nicer_facet = []
     for i in facet:
         if i[0] in language_dict.keys():
             nicer_facet.append((i[0], i[1], language_dict[i[0]]))
         elif i[0] not in ('N/A',):
             nicer_facet.append((i[0], i[1], i[0]))
     return nicer_facet
Example #2
0
def format_element(bfo,
                   tag="909C4",
                   label="",
                   separator="<br/> ",
                   description_location=""):
    """
    Return an HTML link to the DOI.

    @param tag: field (tag + indicators) where the DOI can be found, if not specified, we take the tags asociated to the 'doi' logical field
    @param separator: the separator between multiple tags
    @param description_location: where should the description be added: if empty, the description is not printed; possible values: 'front', 'label', 'end'
    @param label: label to use for the DOI link. If not specified, use the DOI number as label for the link.
    """
    fields = []
    doi_tags = get_field_tags('doi')  #first check the tags table
    for doi_tag in doi_tags:
        fields = bfo.fields(
            doi_tag[:5])  #we want only the tag, without the subfields
        if fields:
            break
    if not fields:
        fields = bfo.fields(tag)
    doi_list = []
    for field in fields:
        if field.get('2', 'DOI') == 'DOI' and 'a' in field:
            desc = field.get('y', '')
            front = end = ''
            if desc:
                if description_location == 'front':
                    front = desc + ': '
                elif description_location == 'label':
                    label = desc
                elif description_location == 'end':
                    end = ' (' + desc + ')'
                else:
                    front = end = ''
            doi_list.append((field['a'], front, end, label))

    if doi_list:
        doi_link = """%s<a href="http://dx.doi.org/%s" title="DOI" target="_blank">%s</a>%s"""
        return separator.join([
            doi_link % (escape(front), escape(
                doi, True), label and escape(label) or escape(doi), end)
            for (doi, front, end, label) in doi_list
        ])
    else:
        return ""
Example #3
0
def get_existing_records_for_reportnumber(reportnum):
    """Given a report number, return a list of recids of real (live) records
       that are associated with it.
       That's to say if the record does not exist (prehaps deleted, for example)
       its recid will now be returned in the list.

       @param reportnum: the report number for which recids are to be returned.
       @type reportnum: string
       @return: list of recids.
       @rtype: list
       @note: If reportnum was not found in phrase indexes, the function searches
           directly in bibxxx tables via MARC tags, so that the record does not
           have to be phrase-indexed.
    """
    existing_records = []  ## List of the report numbers of existing records

    ## Get list of records with the report-number: (first in phrase indexes)
    reclist = list(search_pattern(req=None,
                                  p=reportnum,
                                  f="reportnumber",
                                  m="e"))
    if not reclist:
        # Maybe the record has not been indexed yet? (look in bibxxx tables)
        tags = get_field_tags("reportnumber")
        for tag in tags:
            recids = list(search_pattern(req=None,
                                         p=reportnum,
                                         f=tag,
                                         m="e"))
            reclist.extend(recids)

        reclist = dict.fromkeys(reclist).keys() # Remove duplicates

    ## Loop through all recids retrieved and testing to see whether the record
    ## actually exists or not. If none of the records exist, there is no record
    ## with this reportnumber; If more than one of the records exists, then
    ## there are multiple records with the report-number; If only one record
    ## exists, then everything is OK,
    for rec in reclist:
        rec_exists = record_exists(rec)
        if rec_exists == 1:
            ## This is a live record record the recid and augment the counter of
            ## records found:
            existing_records.append(rec)
    return existing_records
Example #4
0
def get_existing_records_for_reportnumber(reportnum):
    """Given a report number, return a list of recids of real (live) records
       that are associated with it.
       That's to say if the record does not exist (prehaps deleted, for example)
       its recid will now be returned in the list.

       @param reportnum: the report number for which recids are to be returned.
       @type reportnum: string
       @return: list of recids.
       @rtype: list
       @note: If reportnum was not found in phrase indexes, the function searches
           directly in bibxxx tables via MARC tags, so that the record does not
           have to be phrase-indexed.
    """
    existing_records = []  ## List of the report numbers of existing records

    ## Get list of records with the report-number: (first in phrase indexes)
    reclist = list(search_pattern(req=None,
                                  p=reportnum,
                                  f="reportnumber",
                                  m="e"))
    if not reclist:
        # Maybe the record has not been indexed yet? (look in bibxxx tables)
        tags = get_field_tags("reportnumber")
        for tag in tags:
            recids = list(search_pattern(req=None,
                                         p=reportnum,
                                         f=tag,
                                         m="e"))
            reclist.extend(recids)

        reclist = dict.fromkeys(reclist).keys() # Remove duplicates

    ## Loop through all recids retrieved and testing to see whether the record
    ## actually exists or not. If none of the records exist, there is no record
    ## with this reportnumber; If more than one of the records exists, then
    ## there are multiple records with the report-number; If only one record
    ## exists, then everything is OK,
    for rec in reclist:
        rec_exists = record_exists(rec)
        if rec_exists == 1:
            ## This is a live record record the recid and augment the counter of
            ## records found:
            existing_records.append(rec)
    return existing_records
Example #5
0
def get_field_data(recids, method_name, definition):
    """Returns the data associated with the definition for recids.
    The returned dictionary will contain ONLY the recids for which
    a value has been found in the database.
    """
    recids_copy = recids.copy()
    #if we are dealing with a MARC definition
    if definition.startswith('MARC'):
        tags = definition.replace('MARC:', '').replace(' ',
                                                       '').strip().split(',')
        if not tags:
            write_message('No MARC tags found for method %s.' \
                          %method_name, verbose=5)
            return {}
        write_message('The following MARC tags will be queried: %s' %tags, \
                      verbose=5)
        return get_data_for_definition_marc(tags, recids_copy)
    #if we are dealing with tags (ex: author, title)
    elif definition.startswith('FIELD'):
        tags = get_field_tags(definition.replace('FIELD:', '').strip())
        if not tags:
            write_message('No tags found for method %s.' \
                          %method_name, verbose=5)
            return {}
        write_message('The following tags will be queried: %s' % tags,
                      verbose=5)
        return get_data_for_definition_marc(tags, recids_copy)
    # if we are dealing with ranking data
    elif definition.startswith('RNK'):
        rnk_name = definition.replace('RNK:', '').strip()
        return get_data_for_definition_rnk(method_name, rnk_name)
    # if we are looking into bibrec table
    elif definition.startswith('BIBREC'):
        column_name = definition.replace('BIBREC:', '').strip()
        return get_data_for_definition_bibrec(column_name, recids_copy)
    else:
        write_message("The definition %s for method % could not be recognized" \
                      %(definition, method_name), stream=sys.stderr)
        return {}
Example #6
0
def format_element(bfo, tag="909C4", label="", separator="<br/> ", description_location=""):
    """
    Return an HTML link to the DOI.

    @param tag: field (tag + indicators) where the DOI can be found, if not specified, we take the tags asociated to the 'doi' logical field
    @param separator: the separator between multiple tags
    @param description_location: where should the description be added: if empty, the description is not printed; possible values: 'front', 'label', 'end'
    @param label: label to use for the DOI link. If not specified, use the DOI number as label for the link.
    """
    fields = []
    doi_tags = get_field_tags('doi') #first check the tags table
    for doi_tag in doi_tags:
        fields = bfo.fields(doi_tag[:5]) #we want only the tag, without the subfields
        if fields:
            break
    if not fields:
        fields = bfo.fields(tag)
    doi_list = [] 
    for field in fields:
        if field.get('2', 'DOI') == 'DOI' and 'a' in field:
            desc = field.get('y', '')
            front = end = ''
            if desc:
                if description_location == 'front':
                    front = desc + ': '
                elif description_location == 'label':
                    label = desc
                elif description_location == 'end':
                    end = ' (' + desc + ')'
                else:
                    front = end = ''
            doi_list.append((field['a'], front, end, label))

    if doi_list:
        doi_link = """%s<a href="http://dx.doi.org/%s" title="DOI" target="_blank">%s</a>%s"""
        return separator.join([doi_link % (escape(front), escape(doi, True), label and escape(label) or escape(doi), end) for (doi, front, end, label) in doi_list])
    else:
        return ""
Example #7
0
def bst_autocompletion_cache(collection_list=None):
    """
    Bibtasklet responsible of the generation of the subjects and authors list for the
    autocompletion suggestions.
    @param collection_list: list of collection ids to cache.
                            If None, all the collections will be calculated.

    """

    task_update_progress("Started updating autocomplete cache")

    tag_dicc = {'en': '9051_a', 'fr': '9061_a', 'es': '9071_a'}

    if collection_list == None:
        res = run_sql("SELECT id FROM collection")
        collection_list = [i[0] for i in res]

    i = 0
    task_update_progress("Done %s of %s" % (i, len(collection_list)))
    for collection in collection_list:
        i += 1
        recids = list(get_collection_reclist(get_collection_name_by_id(collection)))
        authors = get_most_popular_field_values(recids, get_field_tags('exactauthor'))[0:200]
        authors = [a[0] for a in authors]

        subjects = {}
        for ln in ['en', 'fr', 'es']:
            subject_tag = tag_dicc[ln]
            subjects[ln] = [s[0] for s in get_most_popular_field_values(recids, subject_tag)]

        ins = AutocompletionCache(id_collection=collection, authors=authors, subjects=subjects)
        db.session.merge(ins)
        db.session.flush()
        task_update_progress("Done %s of %s" % (i, len(collection_list)))


    db.session.close_all()
    task_update_progress("Finished updating autocomplete cache")
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    title_tags = get_field_tags('title')

    book_title = ''
    i = 0
    while book_title == '' and i < len(title_tags):
        l = get_fieldvalues(recid, title_tags[i])
        for candidate in l:
            book_title = book_title + candidate + ': '
        i += 1

    book_title = book_title[:-2]

    return book_title
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    title_tags = get_field_tags('title')

    book_title = ''
    i = 0
    while book_title == '' and i < len(title_tags):
        l = get_fieldvalues(recid, title_tags[i])
        for candidate in l:
            book_title = book_title + candidate + ': '
        i += 1

    book_title = book_title[:-2]

    return book_title
Example #10
0
def get_field_data(recids, method_name, definition):
    """Returns the data associated with the definition for recids.
    The returned dictionary will contain ONLY the recids for which
    a value has been found in the database.
    """
    recids_copy = recids.copy()
    #if we are dealing with a MARC definition
    if definition.startswith('MARC'):
        tags = definition.replace('MARC:', '').replace(' ', '').strip().split(',')
        if not tags:
            write_message('No MARC tags found for method %s.' \
                          %method_name, verbose=5)
            return {}
        write_message('The following MARC tags will be queried: %s' %tags, \
                      verbose=5)
        return get_data_for_definition_marc(tags, recids_copy)
    #if we are dealing with tags (ex: author, title)
    elif definition.startswith('FIELD'):
        tags = get_field_tags(definition.replace('FIELD:', '').strip())
        if not tags:
            write_message('No tags found for method %s.' \
                          %method_name, verbose=5)
            return {}
        write_message('The following tags will be queried: %s' %tags, verbose=5)
        return get_data_for_definition_marc(tags, recids_copy)
    # if we are dealing with ranking data
    elif definition.startswith('RNK'):
        rnk_name = definition.replace('RNK:', '').strip()
        return get_data_for_definition_rnk(method_name, rnk_name)
    # if we are looking into bibrec table
    elif definition.startswith('BIBREC'):
        column_name = definition.replace('BIBREC:', '').strip()
        return get_data_for_definition_bibrec(column_name, recids_copy)
    else:
        write_message("The definition %s for method % could not be recognized" \
                      %(definition, method_name), stream=sys.stderr)
        return {}
 def get_facets_for_query(self, qid, limit=20, parent=None):
     return get_most_popular_field_values(self.get_recids(qid),
                                          get_field_tags(
                                              self.name))[0:limit]
Example #12
0
    def get_facets_for_query(self, qid, limit=20, parent=None):
        facet = get_most_popular_field_values(self.get_recids(qid),
                                             get_field_tags(self.name))
        nicer_facet = [i for i in facet if is_correct_year(i[0])]

        return sorted(nicer_facet, key=lambda x: x[0], reverse=True)[0:limit]