def test_update_self_cites_tables(self):
        from invenio.legacy.bibrank.selfcites_indexer import update_self_cites_tables
        from invenio.legacy.bibrank.selfcites_indexer import get_authors_tags

        tags = get_authors_tags()
        config = {}
        update_self_cites_tables(1, config, tags)
Example #2
0
def fill_self_cites_tables(config):
    """
    This will fill the self-cites tables with data

    The purpose of this function is to fill these tables on a website that
    never ran the self-cites daemon
    """
    algorithm = config['algorithm']
    tags = get_authors_tags()
    all_ids = [r[0] for r in run_sql('SELECT id FROM bibrec ORDER BY id')]
    citations_fun = get_citations_fun(algorithm)
    write_message('using %s' % citations_fun.__name__)
    if algorithm == 'friends':
        # We only needs this table for the friends algorithm or assimilated
        # Fill intermediary tables
        for index, recid in enumerate(all_ids):
            if index % 1000 == 0:
                msg = 'intermediate %d/%d' % (index, len(all_ids))
                task_update_progress(msg)
                write_message(msg)
                task_sleep_now_if_required()
            update_self_cites_tables(recid, config, tags)
    # Fill self-cites table
    for index, recid in enumerate(all_ids):
        if index % 1000 == 0:
            msg = 'final %d/%d' % (index, len(all_ids))
            task_update_progress(msg)
            write_message(msg)
            task_sleep_now_if_required()
        compute_and_store_self_citations(recid, tags, citations_fun)
Example #3
0
def fill_self_cites_tables(rank_method_code, config):
    """
    This will fill the self-cites tables with data

    The purpose of this function is to fill these tables on a website that
    never ran the self-cites daemon

    This is an optimization when running on empty tables, and we hope the
    result is the same as the compute_and_store_self_citations.
    """
    begin_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    algorithm = config['algorithm']
    tags = get_authors_tags()
    selfcites_dic = {}
    all_ids = intbitset(run_sql('SELECT id FROM bibrec ORDER BY id'))
    citations_fun = get_citations_fun(algorithm)
    write_message('using %s' % citations_fun.__name__)
    if algorithm == 'friends':
        # We only needs this table for the friends algorithm or assimilated
        # Fill intermediary tables
        for index, recid in enumerate(all_ids):
            if index % 1000 == 0:
                msg = 'intermediate %d/%d' % (index, len(all_ids))
                task_update_progress(msg)
                write_message(msg)
                task_sleep_now_if_required()
            update_self_cites_tables(recid, config, tags)
    # Fill self-cites table
    for index, recid in enumerate(all_ids):
        if index % 1000 == 0:
            msg = 'final %d/%d' % (index, len(all_ids))
            task_update_progress(msg)
            write_message(msg)
            task_sleep_now_if_required()
        compute_and_store_self_citations(recid,
                                         tags,
                                         citations_fun,
                                         selfcites_dic)
    intoDB(selfcites_dic, begin_date, rank_method_code)
    store_weights_cache(selfcites_dic)
Example #4
0
def fill_self_cites_tables(rank_method_code, config):
    """
    This will fill the self-cites tables with data

    The purpose of this function is to fill these tables on a website that
    never ran the self-cites daemon

    This is an optimization when running on empty tables, and we hope the
    result is the same as the compute_and_store_self_citations.
    """
    begin_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    algorithm = config['algorithm']
    tags = get_authors_tags()
    selfcites_dic = {}
    all_ids = intbitset(run_sql('SELECT id FROM bibrec ORDER BY id'))
    citations_fun = get_citations_fun(algorithm)
    write_message('using %s' % citations_fun.__name__)
    if algorithm == 'friends':
        # We only needs this table for the friends algorithm or assimilated
        # Fill intermediary tables
        for index, recid in enumerate(all_ids):
            if index % 1000 == 0:
                msg = 'intermediate %d/%d' % (index, len(all_ids))
                task_update_progress(msg)
                write_message(msg)
                task_sleep_now_if_required()
            update_self_cites_tables(recid, config, tags)
    # Fill self-cites table
    for index, recid in enumerate(all_ids):
        if index % 1000 == 0:
            msg = 'final %d/%d' % (index, len(all_ids))
            task_update_progress(msg)
            write_message(msg)
            task_sleep_now_if_required()
        compute_and_store_self_citations(recid, tags, citations_fun,
                                         selfcites_dic)
    intoDB(selfcites_dic, begin_date, rank_method_code)
    store_weights_cache(selfcites_dic)
Example #5
0
 def test_update_self_cites_tables(self):
     from invenio.legacy.bibrank.selfcites_indexer import update_self_cites_tables
     from invenio.legacy.bibrank.selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     config = {}
     update_self_cites_tables(1, config, tags)