Exemple #1
0
def compare_va_to_ra(va_id, ra_id):
    '''
    Compares the origin names of a virtual author against the name list of
    a real author

    @param va_id: ID of the virtual author
    @type va_id: int
    @param ra_id: ID of the real author
    @type ra_id: int

    @return: The probability resulting from the name comparison.
    @rtype: float
    '''

    bconfig.LOGGER.info("|-> Start of name comparison (va %s : ra %s)"
                  % (va_id, ra_id))

    ra_names = get_realauthor_names_from_set(ra_id)
    va_nameid_recs = get_virtualauthor_records(va_id, tag='orig_authorname_id')
#    print "RA Names: ", ra_names
#    print "VA Name: ", va_name

    authorname_id = -1
    if va_nameid_recs:
        authorname_id = va_nameid_recs[0]['value']

    authorname_strings = get_name_and_db_name_strings(authorname_id)

    if not authorname_strings["name"]:
        return 0.0

    comparisons = []

    for ra_name in ra_names:
        comparison = compare_names(authorname_strings["name"], ra_name)
        bconfig.LOGGER.info("|-> %s & %s -> %s"
                            % (authorname_strings["name"],
                               ra_name, comparison))
        comparisons.append(comparison)

    #print "checking ",name_1," against ", name_2

    bconfig.LOGGER.debug("|--> Name comparisons: %s" % (comparisons))
    bconfig.LOGGER.info("|-> End of name comparison")

#    ret = average(comparisons)
    ret = float(sum(comparisons)) / len(comparisons)

    if ret < .1:
        ret = 0 #.1

    bconfig.LOGGER.info("|--> Resulting name probability: %s" % (ret))

    return ret
def compare_va_to_ra(va_id, ra_id):
    """
    Compares the origin names of a virtual author against the name list of
    a real author

    @param va_id: ID of the virtual author
    @type va_id: int
    @param ra_id: ID of the real author
    @type ra_id: int

    @return: The probability resulting from the name comparison.
    @rtype: float
    """

    bconfig.LOGGER.info("|-> Start of name comparison (va %s : ra %s)" % (va_id, ra_id))

    ra_names = get_realauthor_names_from_set(ra_id)
    va_nameid_recs = get_virtualauthor_records(va_id, tag="orig_authorname_id")
    #    print "RA Names: ", ra_names
    #    print "VA Name: ", va_name

    authorname_id = -1
    if va_nameid_recs:
        authorname_id = va_nameid_recs[0]["value"]

    authorname_strings = get_name_and_db_name_strings(authorname_id)

    if not authorname_strings["name"]:
        return 0.0

    comparisons = []

    for ra_name in ra_names:
        comparison = compare_names(authorname_strings["name"], ra_name)
        bconfig.LOGGER.info("|-> %s & %s -> %s" % (authorname_strings["name"], ra_name, comparison))
        comparisons.append(comparison)

    # print "checking ",name_1," against ", name_2

    bconfig.LOGGER.debug("|--> Name comparisons: %s" % (comparisons))
    bconfig.LOGGER.info("|-> End of name comparison")

    #    ret = average(comparisons)
    ret = float(sum(comparisons)) / len(comparisons)

    if ret < 0.1:
        ret = 0  # .1

    bconfig.LOGGER.info("|--> Resulting name probability: %s" % (ret))

    return ret
def get_information_from_dataset(va_id, ra_id=-1):
    '''
    Retrieves information about the data
    of a virtual author from the data set.

    In dependency of the real author ID, the information will be written to the
    real author holding this ID. If the real author ID should be the default
    '-1', a list with all the data will be returned.

    @param va_id: Virtual author ID to get the information from
    @type va_id: int
    @param ra_id: Real author ID to set information for.
    @type ra_id: int

    @return: True, if ra_id is set OR A list of the data
    @rtype: True if ra_id > -1 or list of strings
    '''
    if dat.RUNTIME_CONFIG["populate_aid_from_personid"]:
        return True

    va_data = get_virtualauthor_records(va_id)
    bibrec_id = ""
    authorname_id = ""

    for va_data_item in va_data:
        if va_data_item['tag'] == "bibrec_id":
            bibrec_id = va_data_item['value']
        elif va_data_item['tag'] == "orig_authorname_id":
            authorname_id = va_data_item['value']

    authorname_strings = get_name_and_db_name_strings(authorname_id)

    bconfig.LOGGER.info("| Reading info for va %s: %s recid %s"
                  % (va_id, authorname_strings["name"], bibrec_id))

    inspireid = get_field_values_on_condition(
        bibrec_id, ['100', '700'], 'i', 'a',
        authorname_strings["db_name"], "==")

    if inspireid:
        inspireid = list(inspireid)[0]

    if ra_id > -1:
        if inspireid:
            set_realauthor_data(ra_id, "inspireid", "%s" % inspireid)

        return True
    else:
        return inspireid
def get_information_from_dataset(va_id, ra_id= -1):
    '''
    Retrieves information about the data
    of a virtual author from the data set.

    In dependency of the real author ID, the information will be written to the
    real author holding this ID. If the real author ID should be the default
    '-1', a list with all the data will be returned.

    @param va_id: Virtual author ID to get the information from
    @type va_id: int
    @param ra_id: Real author ID to set information for.
    @type ra_id: int

    @return: True, if ra_id is set OR A list of the data
    @rtype: True if ra_id > -1 or list of strings
    '''
    va_data = get_virtualauthor_records(va_id)
    bibrec_id = ""

    for va_data_item in va_data:
        if va_data_item['tag'] == "bibrec_id":
            bibrec_id = va_data_item['value']
        elif va_data_item['tag'] == "orig_authorname_id":
            authorname_id = va_data_item['value']

    authorname_strings = get_name_and_db_name_strings(authorname_id)

    bconfig.LOGGER.info("| Reading info for va %s: %s recid %s"
                  % (va_id, authorname_strings["name"], bibrec_id))

    data = get_field_values_on_condition(
        bibrec_id, ['100', '700'], 'a', 'a',
        authorname_strings["db_name"], "!=")

    if ra_id > -1:
        formatted = "something"
        set_realauthor_data(ra_id, "module_tag", "module_value %s"
                            % (formatted))

        return True
    else:
        return data
Exemple #5
0
def get_information_from_dataset(va_id, ra_id=-1):
    '''
    Retrieves information about the data
    of a virtual author from the data set.

    In dependency of the real author ID, the information will be written to the
    real author holding this ID. If the real author ID should be the default
    '-1', a list with all the data will be returned.

    @param va_id: Virtual author ID to get the information from
    @type va_id: int
    @param ra_id: Real author ID to set information for.
    @type ra_id: int

    @return: True, if ra_id is set OR A list of the data
    @rtype: True if ra_id > -1 or list of strings
    '''
    va_data = get_virtualauthor_records(va_id)
    bibrec_id = ""

    for va_data_item in va_data:
        if va_data_item['tag'] == "bibrec_id":
            bibrec_id = va_data_item['value']
        elif va_data_item['tag'] == "orig_authorname_id":
            authorname_id = va_data_item['value']

    authorname_strings = get_name_and_db_name_strings(authorname_id)

    bconfig.LOGGER.info("| Reading info for va %s: %s recid %s" %
                        (va_id, authorname_strings["name"], bibrec_id))

    data = get_field_values_on_condition(bibrec_id, ['100', '700'], 'a', 'a',
                                         authorname_strings["db_name"], "!=")

    if ra_id > -1:
        formatted = "something"
        set_realauthor_data(ra_id, "module_tag",
                            "module_value %s" % (formatted))

        return True
    else:
        return data
def get_information_from_dataset(va_id, ra_id=-1):
    '''
    Retrieves information about the coauthors/collaboration attachment
    of a virtual author from the data set.

    In dependency of the real author ID, the information will be written to the
    real author holding this ID. If the real author ID should be the default
    '-1', a list with all the coauthors will be returned.

    @param va_id: Virtual author ID to get the information from
    @type va_id: int
    @param ra_id: Real author ID to set information for.
    @type ra_id: int

    @return: True, if ra_id is set OR A list of coauthors OR the name of a
        collaboration
    @rtype: True if ra_id > -1 or list of strings or string
    '''
    va_data = get_virtualauthor_records(va_id)
    bibrec_id = ""
    authorname_id = -1

    for va_data_item in va_data:
        if va_data_item['tag'] == "bibrec_id":
            bibrec_id = va_data_item['value']
        elif va_data_item['tag'] == "orig_authorname_id":
            authorname_id = va_data_item['value']

    authorname_strings = get_name_and_db_name_strings(authorname_id)

    bconfig.LOGGER.info("| Reading coauthors for va %s: %s recid %s" %
                        (va_id, authorname_strings["name"], bibrec_id))

    coauthors = get_field_values_on_condition(bibrec_id, ['100', '700'], 'a',
                                              'a',
                                              authorname_strings["db_name"],
                                              "!=")

    collaboration = get_field_values_on_condition(bibrec_id, "710", "g")

    if (not coauthors) and (not collaboration):
        bconfig.LOGGER.info("|-> No coauthors and no collaboration found "
                            "for this author on this record")
    elif not ra_id:
        if collaboration:
            bconfig.LOGGER.info("|-> Collaboration found: %s" %
                                (list(collaboration)[0]))
        else:
            bconfig.LOGGER.info("|-> Coauthors found: %s" % (len(coauthors)))

    max_coauthors = MAX_COAUTHORS

    if ra_id > -1:
        if collaboration:
            cname = list(collaboration)[0]
            coauthor_formatted = create_unified_name(cname.lower())
            set_realauthor_data(
                ra_id, "coauthor",
                "%s;;%s" % (authorname_strings["name"], coauthor_formatted))
        else:
            if len(coauthors) <= max_coauthors:
                for coauthor in coauthors:
                    coauthor_formatted = create_unified_name(coauthor.lower())
                    set_realauthor_data(
                        ra_id, "coauthor", "%s;;%s" %
                        (authorname_strings["name"], coauthor_formatted))
            else:
                hashvalue = hash_coauthor_set(coauthors)
                bconfig.LOGGER.info("|--> Coauthor # > %s. To preserve"
                                    " information, a hash will be stored: %s" %
                                    (max_coauthors, hashvalue))
                set_realauthor_data(
                    ra_id, "coauthor",
                    "%s;;%s" % (authorname_strings["name"], hashvalue))

        return True
    else:
        if collaboration:
            return collaboration
        else:
            return coauthors
def get_information_from_dataset(va_id, ra_id=-1):
    '''
    Retrieves information about the affiliation of a virtual author
    from the data set.

    In dependency of the real author ID, the information will be written to the
    real author holding this ID. If the real author ID should be the default
    '-1', a list with all the affiliations will be returned.

    @param va_id: Virtual author ID to get the info from
    @type va_id: int
    @param ra_id: Real author ID to set information for.
    @type ra_id: int

    @return: A list of affiliations or simply True, if ra_id is set.
    @rtype: list of strings or True if ra_id > -1
    '''

    va_data = get_virtualauthor_records(va_id)
    authorname_id = -1
    bibrec_id = ""

    for va_data_item in va_data:
        if va_data_item['tag'] == "bibrec_id":
            bibrec_id = va_data_item['value']
        elif va_data_item['tag'] == "orig_authorname_id":
            authorname_id = va_data_item['value']

    authorname_strings = get_name_and_db_name_strings(authorname_id)
    bconfig.LOGGER.info("| Reading affiliations for va %s: %s  recid %s" %
                        (va_id, authorname_strings["name"], bibrec_id))
    affiliations = get_field_values_on_condition(bibrec_id, ['100', '700'],
                                                 'u', 'a',
                                                 authorname_strings["db_name"])
    record_date = get_field_values_on_condition(bibrec_id, '269', 'c')
    constructed_date = []
    datearray = []

    if len(record_date) > 0:
        datearray = list(record_date)[0].split("-")
    else:
        datearray = ['0000', '00']

    length = len(datearray)

    if length == 3:
        datearray.pop()
        constructed_date = datearray
    elif length == 2:
        constructed_date = datearray
    else:
        constructed_date = datearray
        constructed_date += ['10']

    affiliation_date = "%s-%s" % (constructed_date[0], constructed_date[1])

    is_aff = False
    is_aff_date = False

    if not affiliations:
        bconfig.LOGGER.info("|-> No Affiliation for this record. Set to None")
        affiliations = ["None"]
    else:
        bconfig.LOGGER.info("|-> Affiliation found: %s" % (affiliations))
        is_aff = True

    if affiliation_date == "0000-00":
        bconfig.LOGGER.info("|-> No Affiliation Date set to 0000-00")
    else:
        bconfig.LOGGER.info("|-> Affiliation date: %s" % (affiliation_date))
        is_aff_date = True

    aff_collection = []

    if is_aff or is_aff_date:
        for affiliation in affiliations:
            bconfig.LOGGER.info(
                "|--> Found Affiliation: %s;;%s;;%s" %
                (affiliation_date, authorname_strings["name"], affiliation))
            aff_collection.append(
                "%s;;%s;;%s" %
                (affiliation_date, authorname_strings["name"], affiliation))

    if ra_id > -1:
        for affiliation in aff_collection:
            set_realauthor_data(ra_id, "affiliation", affiliation)

        return True
    else:
        return aff_collection
def get_information_from_dataset(va_id, ra_id= -1):
    '''
    Retrieves information about the affiliation of a virtual author
    from the data set.

    In dependency of the real author ID, the information will be written to the
    real author holding this ID. If the real author ID should be the default
    '-1', a list with all the affiliations will be returned.

    @param va_id: Virtual author ID to get the info from
    @type va_id: int
    @param ra_id: Real author ID to set information for.
    @type ra_id: int

    @return: A list of affiliations or simply True, if ra_id is set.
    @rtype: list of strings or True if ra_id > -1
    '''
    src = "MEM"

    if bconfig.STANDALONE or dat.RUNTIME_CONFIG["populate_aid_from_personid"]:
        src = "API"

    va_data = get_virtualauthor_records(va_id)
    authorname_id = -1
    bibrec_id = ""

    for va_data_item in va_data:
        if va_data_item['tag'] == "bibrec_id":
            bibrec_id = va_data_item['value']
        elif va_data_item['tag'] == "orig_authorname_id":
            authorname_id = va_data_item['value']

    authorname_strings = get_name_and_db_name_strings(authorname_id)
    bconfig.LOGGER.info("| Reading affiliations for va %s: %s  recid %s"
                  % (va_id, authorname_strings["name"], bibrec_id))
    affiliations = get_field_values_on_condition(
                                        bibrec_id, ['100', '700'], 'u', 'a',
                                        authorname_strings["db_name"], source=src)
    record_date = get_field_values_on_condition(bibrec_id, '269', 'c', source=src)
    constructed_date = []
    datearray = []

    if len(record_date) > 0:
        datearray = list(record_date)[0].split("-")
    else:
        datearray = ['0000', '00']

    length = len(datearray)

    if length == 3:
        datearray.pop()
        constructed_date = datearray
    elif length == 2:
        constructed_date = datearray
    else:
        constructed_date = datearray
        constructed_date += ['10']

    affiliation_date = "%s-%s" % (constructed_date[0], constructed_date[1])

    is_aff = False
    is_aff_date = False

    if not affiliations:
        bconfig.LOGGER.info("|-> No Affiliation for this record. Set to None")
        affiliations = ["None"]
    else:
        bconfig.LOGGER.info("|-> Affiliation found: %s" % (affiliations))
        is_aff = True

    if affiliation_date == "0000-00":
        bconfig.LOGGER.info("|-> No Affiliation Date set to 0000-00")
    else:
        bconfig.LOGGER.info("|-> Affiliation date: %s" % (affiliation_date))
        is_aff_date = True

    aff_collection = []

    if is_aff or is_aff_date:
        for affiliation in affiliations:
            bconfig.LOGGER.info("|--> Found Affiliation: %s;;%s;;%s"
                          % (affiliation_date, authorname_strings["name"],
                             affiliation))
            aff_collection.append("%s;;%s;;%s" % (affiliation_date,
                                                  authorname_strings["name"],
                                                  affiliation))

    if ra_id > -1:
        for affiliation in aff_collection:
            set_realauthor_data(ra_id, "affiliation", affiliation)

        return True
    else:
        return aff_collection
def get_information_from_dataset(va_id, ra_id= -1):
    '''
    Retrieves information about the coauthors/collaboration attachment
    of a virtual author from the data set.

    In dependency of the real author ID, the information will be written to the
    real author holding this ID. If the real author ID should be the default
    '-1', a list with all the coauthors will be returned.

    @param va_id: Virtual author ID to get the information from
    @type va_id: int
    @param ra_id: Real author ID to set information for.
    @type ra_id: int

    @return: True, if ra_id is set OR A list of coauthors OR the name of a
        collaboration
    @rtype: True if ra_id > -1 or list of strings or string
    '''
    va_data = get_virtualauthor_records(va_id)
    bibrec_id = ""
    authorname_id = -1

    for va_data_item in va_data:
        if va_data_item['tag'] == "bibrec_id":
            bibrec_id = va_data_item['value']
        elif va_data_item['tag'] == "orig_authorname_id":
            authorname_id = va_data_item['value']

    authorname_strings = get_name_and_db_name_strings(authorname_id)

    bconfig.LOGGER.info("| Reading coauthors for va %s: %s recid %s"
                  % (va_id, authorname_strings["name"], bibrec_id))

    coauthors = get_field_values_on_condition(
                                        bibrec_id, ['100', '700'], 'a', 'a',
                                        authorname_strings["db_name"], "!=")

    collaboration = get_field_values_on_condition(bibrec_id, "710", "g")

    if (not coauthors) and (not collaboration):
        bconfig.LOGGER.info("|-> No coauthors and no collaboration found "
                            "for this author on this record")
    elif not ra_id:
        if collaboration:
            bconfig.LOGGER.info("|-> Collaboration found: %s"
                          % (list(collaboration)[0]))
        else:
            bconfig.LOGGER.info("|-> Coauthors found: %s" % (len(coauthors)))

    max_coauthors = MAX_COAUTHORS

    if ra_id > -1:
        if collaboration:
            cname = list(collaboration)[0]
            coauthor_formatted = create_unified_name(cname.lower())
            set_realauthor_data(ra_id, "coauthor", "%s;;%s"
                                % (authorname_strings["name"],
                                   coauthor_formatted))
        else:
            if len(coauthors) <= max_coauthors:
                for coauthor in coauthors:
                    coauthor_formatted = create_unified_name(coauthor.lower())
                    set_realauthor_data(ra_id, "coauthor", "%s;;%s"
                                    % (authorname_strings["name"],
                                       coauthor_formatted))
            else:
                hashvalue = hash_coauthor_set(coauthors)
                bconfig.LOGGER.info("|--> Coauthor # > %s. To preserve"
                                    " information, a hash will be stored: %s"
                                    % (max_coauthors, hashvalue))
                set_realauthor_data(ra_id, "coauthor", "%s;;%s"
                                    % (authorname_strings["name"],
                                       hashvalue))

        return True
    else:
        if collaboration:
            return collaboration
        else:
            return coauthors