Beispiel #1
0
def _get_personids_to_update_extids(papers=None):
    '''
    It returns the set of personids of which we should recalculate
    their external ids.
    @param papers: papers
    @type papers: set or None
    @return: personids
    @rtype: set
    '''
    last_log = get_user_logs(userinfo='daemon',
                             action='PID_UPDATE',
                             only_most_recent=True)
    if last_log:
        daemon_last_time_run = last_log[0][2]
        modified_bibrecs = get_modified_papers_since(daemon_last_time_run)
    else:
        modified_bibrecs = get_all_valid_bibrecs()
    if papers:
        modified_bibrecs &= set(papers)
    if not modified_bibrecs:
        return None
    if bconfig.LIMIT_EXTERNAL_IDS_COLLECTION_TO_CLAIMED_PAPERS:
        modified_bibrecs = [
            rec[0] for rec in get_claimed_papers_from_papers(modified_bibrecs)
        ]
    personids_to_update_extids = set()
    for bibrec in modified_bibrecs:
        personids_to_update_extids |= set(get_authors_of_claimed_paper(bibrec))
    return personids_to_update_extids
def _get_personids_to_update_extids(papers=None):
    '''
    It returns the set of personids of which we should recalculate
    their external ids.
    @param papers: papers
    @type papers: set or None
    @return: personids
    @rtype: set
    '''
    last_log = get_user_logs(userinfo='daemon', action='PID_UPDATE', only_most_recent=True)
    if last_log:
        daemon_last_time_run = last_log[0][2]
        modified_bibrecs = get_modified_papers_since(daemon_last_time_run)
    else:
        modified_bibrecs = get_all_valid_papers()
    if papers:
        modified_bibrecs &= set(papers)
    if not modified_bibrecs:
        return None
    if bconfig.LIMIT_EXTERNAL_IDS_COLLECTION_TO_CLAIMED_PAPERS:
        modified_bibrecs = [rec[0] for rec in get_claimed_papers_from_papers(modified_bibrecs)]
    personids_to_update_extids = set()
    for bibrec in modified_bibrecs:
        personids_to_update_extids |= set(get_authors_of_claimed_paper(bibrec))
    return personids_to_update_extids
Beispiel #3
0
def run_tortoise(from_scratch,
                 last_names_thresholds=None,
                 single_threaded=False):

    _prepare_tortoise_cache()


    from invenio.bibauthorid_tortoise import tortoise, \
        tortoise_from_scratch, tortoise_last_name, tortoise_last_names

    if single_threaded and last_names_thresholds:
        for last_name, threshold in last_names_thresholds.items():
            tortoise_last_name(last_name,
                               wedge_threshold=threshold,
                               from_mark=from_scratch)
    elif last_names_thresholds:
        names_with_args = list()
        for last_name, threshold in last_names_thresholds.items():
            kwargs = dict()
            if from_scratch:
                kwargs['from_mark'] = from_scratch
            else:
                kwargs['pure'] = from_scratch
            if threshold:
                args = (last_name, threshold)
            else:
                args = (last_name, )
            names_with_args.append((args, kwargs))
        tortoise_last_names(names_with_args)
    elif from_scratch:
        tortoise_from_scratch()
    else:
        start_time = get_db_time()
        tortoise_db_name = 'tortoise'

        last_run = get_user_logs(userinfo=tortoise_db_name,
                                 only_most_recent=True)
        if last_run:
            modified = get_modified_papers_since(last_run[0][2])
        else:
            modified = []
        tortoise(modified)

        insert_user_log(tortoise_db_name,
                        '-1',
                        '',
                        '',
                        '',
                        timestamp=start_time)
def run_tortoise(from_scratch):
    from invenio.bibauthorid_tortoise import tortoise, tortoise_from_scratch

    if from_scratch:
        tortoise_from_scratch()
    else:
        start_time = get_db_time()
        tortoise_db_name = 'tortoise'

        last_run = get_user_logs(userinfo=tortoise_db_name, only_most_recent=True)
        if last_run:
            modified = get_modified_papers_since(last_run[0][2])
        else:
            modified = []
        tortoise(modified)

    insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def run_tortoise(from_scratch):
    from invenio.bibauthorid_tortoise import tortoise, tortoise_from_scratch

    if from_scratch:
        tortoise_from_scratch()
    else:
        start_time = get_db_time()
        tortoise_db_name = 'tortoise'

        last_run = get_user_logs(userinfo=tortoise_db_name,
                                 only_most_recent=True)
        if last_run:
            modified = get_modified_papers_since(last_run[0][2])
        else:
            modified = []
        tortoise(modified)

    insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def run_rabbit(paperslist, all_records=False):
    if not paperslist and all_records:
        rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers')
    elif not paperslist:
        last_log = get_user_logs(userinfo='daemon', action='PID_UPDATE', only_most_recent=True)

        if len(last_log) >= 1:
            #select only the most recent papers
            recently_modified = get_modified_papers_since(since=last_log[0][2])
            if not recently_modified:
                bibtask.write_message("update_personID_table_from_paper: "
                                      "All person entities up to date.",
                                      stream=sys.stdout, verbose=0)
            else:
                bibtask.write_message("update_personID_table_from_paper: Running on: " +
                                      str(recently_modified), stream=sys.stdout, verbose=0)
                rabbit_with_log(recently_modified, True, 'bibauthorid_daemon, run_personid_fast_assign_papers on '
                                                 + str([paperslist, all_records, recently_modified]))
        else:
            rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers')
    else:
        rabbit_with_log(paperslist, True, 'bibauthorid_daemon, personid_fast_assign_papers on ' + str(paperslist), partial=True)
Beispiel #7
0
def run_rabbit(paperslist, all_records=False):
    if not paperslist and all_records:
        rabbit_with_log(None, True,
                        'bibauthorid_daemon, update_personid on all papers')
    elif not paperslist:
        last_log = get_user_logs(userinfo='daemon',
                                 action='PID_UPDATE',
                                 only_most_recent=True)

        if len(last_log) >= 1:
            # select only the most recent papers
            recently_modified = get_modified_papers_since(since=last_log[0][2])
            if not recently_modified:
                bibtask.write_message(
                    "update_personID_table_from_paper: "
                    "All person entities up to date.",
                    stream=sys.stdout,
                    verbose=0)
            else:
                bibtask.write_message(
                    "update_personID_table_from_paper: Running on: " +
                    str(recently_modified),
                    stream=sys.stdout,
                    verbose=0)
                rabbit_with_log(
                    recently_modified, True,
                    'bibauthorid_daemon, run_personid_fast_assign_papers on ' +
                    str([paperslist, all_records, recently_modified]))
        else:
            rabbit_with_log(
                None, True,
                'bibauthorid_daemon, update_personid on all papers')
    else:
        rabbit_with_log(paperslist,
                        True,
                        'bibauthorid_daemon, personid_fast_assign_papers on ' +
                        str(paperslist),
                        partial=True)