def find_and_process_updates(process_initials):
    """
    Finds and processes not updated virtualauthors (which are identified by
    the 'updated' tag) and delivers the ID of this virtualauthor to the
    function responsible for assigning the virtualauthor to a realauthor.

    @param process_initials: If names with initials only shall be
        processed or not
    @type process_initials: boolean
    """
    if dat.VIRTUALAUTHOR_PROCESS_QUEUE.empty():
        init_va_process_queue()

    while True:
        va_id = -1

        if dat.VIRTUALAUTHOR_PROCESS_QUEUE.empty():
            bconfig.LOGGER.debug("Empty Queue. Job finished. Nothing to do.")
            break
        else:
            va_id = dat.VIRTUALAUTHOR_PROCESS_QUEUE.get()

        va_name = bibauthorid_virtualauthor_utils.get_virtualauthor_records(va_id, tag="orig_name_string")[0]["value"]

        if not process_initials:
            if bibauthorid_utils.split_name_parts(va_name)[2]:
                (bibauthorid_virtualauthor_utils.delete_virtualauthor_record(va_id, "updated"))
                bconfig.LOGGER.log(25, "|> Inserting VA:" + " %s Orig. name: %s" % (va_id, va_name))
                add_virtualauthor(va_id)
        else:
            (bibauthorid_virtualauthor_utils.delete_virtualauthor_record(va_id, "updated"))
            bconfig.LOGGER.log(25, "|> Inserting VA: %s Orig. name: %s" % (va_id, va_name))
            add_virtualauthor(va_id)
def remove_va_from_ra(ra_id, va_id):
    """
    Removes a selected virtual author from a real author

    @param ra_id: id of the virtual author to be altered
    @type ra_id: int
    @param va_id: if of the virtual author to be removed from ra attachment
    @type va_id: int
    """
    for remove in [
        row for row in dat.REALAUTHORS if ((row["realauthorid"] == ra_id) and (row["virtualauthorid"] == va_id))
    ]:
        dat.REALAUTHORS.remove(remove)

    bibauthorid_virtualauthor_utils.delete_virtualauthor_record(va_id, "connected")
def remove_va_from_ra(ra_id, va_id):
    '''
    Removes a selected virtual author from a real author

    @param ra_id: id of the virtual author to be altered
    @type ra_id: int
    @param va_id: if of the virtual author to be removed from ra attachment
    @type va_id: int
    '''
    for remove in [row for row in dat.REALAUTHORS
               if ((row['realauthorid'] == ra_id) and
                   (row['virtualauthorid'] == va_id))]:
        dat.REALAUTHORS.remove(remove)

    bibauthorid_virtualauthor_utils.delete_virtualauthor_record(va_id,
                                                                'connected')
Exemple #4
0
def find_and_process_updates(process_initials):
    '''
    Finds and processes not updated virtualauthors (which are identified by
    the 'updated' tag) and delivers the ID of this virtualauthor to the
    function responsible for assigning the virtualauthor to a realauthor.

    @param process_initials: If names with initials only shall be
        processed or not
    @type process_initials: boolean
    '''
    if dat.VIRTUALAUTHOR_PROCESS_QUEUE.empty():
        init_va_process_queue()

    while True:
        va_id = -1

        if dat.VIRTUALAUTHOR_PROCESS_QUEUE.empty():
            bconfig.LOGGER.debug("Empty Queue. Job finished. Nothing to do.")
            break
        else:
            va_id = dat.VIRTUALAUTHOR_PROCESS_QUEUE.get()

        va_name = (bibauthorid_virtualauthor_utils.get_virtualauthor_records(
            va_id, tag='orig_name_string')[0]['value'])

        if not process_initials:
            if bibauthorid_utils.split_name_parts(va_name)[2]:
                (bibauthorid_virtualauthor_utils.delete_virtualauthor_record(
                    va_id, 'updated'))
                bconfig.LOGGER.log(
                    25, "|> Inserting VA:" + " %s Orig. name: %s" %
                    (va_id, va_name))
                add_virtualauthor(va_id)
        else:
            (bibauthorid_virtualauthor_utils.delete_virtualauthor_record(
                va_id, 'updated'))
            bconfig.LOGGER.log(
                25, "|> Inserting VA: %s Orig. name: %s" % (va_id, va_name))
            add_virtualauthor(va_id)
def add_virtualauthor(va_id, multi_va_to_ra=False):
    """
    Adds a new virtual author to the real authors system:
    the idea is to search for possibly compatible real authors, then compare
    the compatibility of this virtual author with all the virtual authors
    connected to the selected real authors and add the new virtualauthor to
    the most compatible real author. In case we do not have a most compatible
    real author, we add the same virtual author to more then one real author
    with a lower probability; this behavior might be changed.

    @param va_id: Virtualauthor ID
    @type va_id: int
    """
    addstart = time.time()
    adding_threshold = bconfig.REALAUTHOR_VA_ADD_THERSHOLD

    if adding_threshold == ["-1"]:
        adding_threshold = 0.7

    already_existing = get_realauthors_by_virtuala_id(va_id)
    ralist = []

    if len(already_existing) <= 0:
        start = time.time()

        va_cluster = bibauthorid_virtualauthor_utils.get_cluster_va_ids_from_va_id(va_id)
        ralist_raw = []

        va_hash = hash(str(va_cluster))

        if va_hash in dat.RA_VA_CACHE:
            ralist_raw = dat.RA_VA_CACHE[va_hash]
            bconfig.LOGGER.debug("|-> Cache Hit for va cluster")
        else:
            bconfig.LOGGER.debug("|-> Cache Fail--Generating new hash")
            ralist_raw = update_ralist_cache(va_cluster, va_hash)

        ralist = [ids["ra_id"] for ids in ralist_raw if ids["va_id"] != va_id]
        ralist = list(set(ralist))

        if len(ralist) > 0:
            min_compatibilities = []

            for i in ralist:
                compatibilities = []
                compatibilities.append(cmp_virtual_to_real_author(va_id, i))
                min_compatibilities.append(min(compatibilities))

            max_min_compatibilities = max(min_compatibilities)

            if max_min_compatibilities < adding_threshold:
                bconfig.LOGGER.log(
                    25,
                    "|-> Creating NEW real author for this"
                    + " virtual author (compatibility below adding threshold"
                    + " of other RAs).",
                )
                create_new_realauthor(va_id)
                update_ralist_cache(va_cluster, va_hash)

            else:
                if min_compatibilities.count(max_min_compatibilities) == 1:
                    index = min_compatibilities.index(max_min_compatibilities)
                    add_realauthor_va(ralist[index], va_id, max_min_compatibilities)
                    bconfig.LOGGER.log(
                        25,
                        "|-> Adding to real author #%s"
                        " with a compatability of %.2f" % (ralist[index], max_min_compatibilities),
                    )

                elif min_compatibilities.count(max_min_compatibilities) > 1:
                    if multi_va_to_ra:
                        bconfig.LOGGER.log(25, "|-> virtual author" " comaptible with more than one realauthor.")
                        indexes = set()

                        for i in xrange(len(min_compatibilities)):
                            indexes.add(min_compatibilities.index(max_min_compatibilities, i))

                        bconfig.LOGGER.log(
                            25, "|-> virtual author" " will be attached to %s real authors" % (len(indexes))
                        )

                        for i in indexes:
                            add_realauthor_va(ralist[i], va_id, max_min_compatibilities)
                            bconfig.LOGGER.log(
                                25,
                                "|--> Adding to real author"
                                " #%s with a compatability of %.2f" % (ralist[i], max_min_compatibilities),
                            )

                    else:
                        bconfig.LOGGER.log(
                            25, "|-> virtual author" " comaptible with more than one realauthor..." "skipped for now."
                        )
                        bconfig.LOGGER.log(
                            25,
                            "|> The (skipped) comparison "
                            "with %s real authors took %.2fs" % (len(ralist), time.time() - start),
                        )
                        (bibauthorid_virtualauthor_utils.update_virtualauthor_record(va_id, "connected", "False"))
                        (bibauthorid_virtualauthor_utils.delete_virtualauthor_record(va_id, "updated"))
                    return
        else:
            bconfig.LOGGER.log(
                25, "|-> Creating NEW real author for this" " Virtual Author (currently, no real author exists)"
            )
            create_new_realauthor(va_id)
            update_ralist_cache(va_cluster, va_hash)

    (bibauthorid_virtualauthor_utils.update_virtualauthor_record(va_id, "connected", "True"))
    (bibauthorid_virtualauthor_utils.delete_virtualauthor_record(va_id, "updated"))

    bconfig.LOGGER.log(25, "|> The comparison with %s real authors took %.2fs" % (len(ralist), time.time() - addstart))
Exemple #6
0
def add_virtualauthor(va_id, multi_va_to_ra=False):
    '''
    Adds a new virtual author to the real authors system:
    the idea is to search for possibly compatible real authors, then compare
    the compatibility of this virtual author with all the virtual authors
    connected to the selected real authors and add the new virtualauthor to
    the most compatible real author. In case we do not have a most compatible
    real author, we add the same virtual author to more then one real author
    with a lower probability; this behavior might be changed.

    @param va_id: Virtualauthor ID
    @type va_id: int
    '''
    addstart = time.time()
    adding_threshold = bconfig.REALAUTHOR_VA_ADD_THERSHOLD

    if adding_threshold == ["-1"]:
        adding_threshold = 0.7

    already_existing = get_realauthors_by_virtuala_id(va_id)
    ralist = []

    if len(already_existing) <= 0:
        start = time.time()

        va_cluster = (bibauthorid_virtualauthor_utils.
                      get_cluster_va_ids_from_va_id(va_id))
        ralist_raw = []

        va_hash = hash(str(va_cluster))

        if va_hash in dat.RA_VA_CACHE:
            ralist_raw = dat.RA_VA_CACHE[va_hash]
            bconfig.LOGGER.debug("|-> Cache Hit for va cluster")
        else:
            bconfig.LOGGER.debug("|-> Cache Fail--Generating new hash")
            ralist_raw = update_ralist_cache(va_cluster, va_hash)

        ralist = [ids['ra_id'] for ids in ralist_raw if ids['va_id'] != va_id]
        ralist = list(set(ralist))

        if len(ralist) > 0:
            min_compatibilities = []

            for i in ralist:
                compatibilities = []
                compatibilities.append(cmp_virtual_to_real_author(va_id, i))
                min_compatibilities.append(min(compatibilities))

            max_min_compatibilities = max(min_compatibilities)

            if max_min_compatibilities < adding_threshold:
                bconfig.LOGGER.log(
                    25, "|-> Creating NEW real author for this" +
                    " virtual author (compatibility below adding threshold" +
                    " of other RAs).")
                create_new_realauthor(va_id)
                update_ralist_cache(va_cluster, va_hash)

            else:
                if min_compatibilities.count(max_min_compatibilities) == 1:
                    index = min_compatibilities.index(max_min_compatibilities)
                    add_realauthor_va(ralist[index], va_id,
                                      max_min_compatibilities)
                    bconfig.LOGGER.log(
                        25, "|-> Adding to real author #%s"
                        " with a compatability of %.2f" %
                        (ralist[index], max_min_compatibilities))

                elif min_compatibilities.count(max_min_compatibilities) > 1:
                    if multi_va_to_ra:
                        bconfig.LOGGER.log(
                            25, "|-> virtual author"
                            " comaptible with more than one realauthor.")
                        indexes = set()

                        for i in xrange(len(min_compatibilities)):
                            indexes.add(
                                min_compatibilities.index(
                                    max_min_compatibilities, i))

                        bconfig.LOGGER.log(
                            25, "|-> virtual author"
                            " will be attached to %s real authors" %
                            (len(indexes)))

                        for i in indexes:
                            add_realauthor_va(ralist[i], va_id,
                                              max_min_compatibilities)
                            bconfig.LOGGER.log(
                                25, "|--> Adding to real author"
                                " #%s with a compatability of %.2f" %
                                (ralist[i], max_min_compatibilities))

                    else:
                        bconfig.LOGGER.log(
                            25, "|-> virtual author"
                            " comaptible with more than one realauthor..."
                            "skipped for now.")
                        bconfig.LOGGER.log(
                            25, "|> The (skipped) comparison "
                            "with %s real authors took %.2fs" %
                            (len(ralist), time.time() - start))
                        (bibauthorid_virtualauthor_utils.
                         update_virtualauthor_record(va_id, 'connected',
                                                     'False'))
                        (bibauthorid_virtualauthor_utils.
                         delete_virtualauthor_record(va_id, 'updated'))
                    return
        else:
            bconfig.LOGGER.log(
                25, "|-> Creating NEW real author for this"
                " Virtual Author (currently, no real author exists)")
            create_new_realauthor(va_id)
            update_ralist_cache(va_cluster, va_hash)

    (bibauthorid_virtualauthor_utils.update_virtualauthor_record(
        va_id, 'connected', 'True'))
    (bibauthorid_virtualauthor_utils.delete_virtualauthor_record(
        va_id, 'updated'))

    bconfig.LOGGER.log(
        25, "|> The comparison with %s real authors took %.2fs" %
        (len(ralist), time.time() - addstart))