Example #1
0
def fill_self_cites_tables(config):
    """
    This will fill the self-cites tables with data

    The purpose of this function is to fill these tables on a website that
    never ran the self-cites daemon
    """
    algorithm = config['algorithm']
    tags = get_authors_tags()
    all_ids = [r[0] for r in run_sql('SELECT id FROM bibrec ORDER BY id')]
    citations_fun = get_citations_fun(algorithm)
    write_message('using %s' % citations_fun.__name__)
    if algorithm == 'friends':
        # We only needs this table for the friends algorithm or assimilated
        # Fill intermediary tables
        for index, recid in enumerate(all_ids):
            if index % 1000 == 0:
                msg = 'intermediate %d/%d' % (index, len(all_ids))
                task_update_progress(msg)
                write_message(msg)
                task_sleep_now_if_required()
            update_self_cites_tables(recid, config, tags)
    # Fill self-cites table
    for index, recid in enumerate(all_ids):
        if index % 1000 == 0:
            msg = 'final %d/%d' % (index, len(all_ids))
            task_update_progress(msg)
            write_message(msg)
            task_sleep_now_if_required()
        compute_and_store_self_citations(recid, tags, citations_fun)
 def test_compute_friends_self_citations(self):
     from invenio.bibrank_selfcites_indexer import \
                                             compute_friends_self_citations
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     ret = compute_friends_self_citations(1, tags)
     self.assertEqual(ret, set())
 def test_compute_friends_self_citations(self):
     from invenio.bibrank_selfcites_indexer import \
                                             compute_friends_self_citations
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     ret = compute_friends_self_citations(1, tags)
     self.assertEqual(ret, set())
def fill_self_cites_tables(config):
    """
    This will fill the self-cites tables with data

    The purpose of this function is to fill these tables on a website that
    never ran the self-cites daemon
    """
    algorithm = config['algorithm']
    tags = get_authors_tags()
    all_ids = [r[0] for r in run_sql('SELECT id FROM bibrec ORDER BY id')]
    citations_fun = get_citations_fun(algorithm)
    write_message('using %s' % citations_fun.__name__)
    if algorithm == 'friends':
        # We only needs this table for the friends algorithm or assimilated
        # Fill intermediary tables
        for index, recid in enumerate(all_ids):
            if index % 1000 == 0:
                msg = 'intermediate %d/%d' % (index, len(all_ids))
                task_update_progress(msg)
                write_message(msg)
                task_sleep_now_if_required()
            update_self_cites_tables(recid, config, tags)
    # Fill self-cites table
    for index, recid in enumerate(all_ids):
        if index % 1000 == 0:
            msg = 'final %d/%d' % (index, len(all_ids))
            task_update_progress(msg)
            write_message(msg)
            task_sleep_now_if_required()
        compute_and_store_self_citations(recid, tags, citations_fun)
 def test_get_authors_tags(self):
     """test_get_authors_tags
     We don't care about the value since it's
     customizable but verify that it doesn't error
     """
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     self.assertEqual(len(tags), 4)
 def test_get_authors_tags(self):
     """test_get_authors_tags
     We don't care about the value since it's
     customizable but verify that it doesn't error
     """
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     self.assertEqual(len(tags), 4)
 def test_get_author_coauthors_list(self):
     from invenio.bibrank_selfcites_indexer import get_author_coauthors_list
     from invenio.bibrank_selfcites_indexer import get_authors_from_record
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     config = {'friends_threshold': 3}
     authors = get_authors_from_record(1, tags)
     self.assert_(get_author_coauthors_list(authors, config))
 def test_get_author_coauthors_list(self):
     from invenio.bibrank_selfcites_indexer import get_author_coauthors_list
     from invenio.bibrank_selfcites_indexer import get_authors_from_record
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     config = {'friends_threshold': 3}
     authors = get_authors_from_record(1, tags)
     self.assert_(get_author_coauthors_list(authors, config))
    def test_process_one(self):
        from invenio.bibrank_selfcites_indexer import get_authors_tags
        from invenio.bibrank_selfcites_task import process_one
        from invenio.bibrank_selfcites_task import get_citations_fun
        from invenio.bibrank_selfcites_indexer import ALL_ALGORITHMS

        tags = get_authors_tags()
        for algorithm in ALL_ALGORITHMS:
            citation_fun = get_citations_fun(algorithm=algorithm)
            process_one(1, tags, citation_fun)
    def test_process_one(self):
        from invenio.bibrank_selfcites_indexer import get_authors_tags
        from invenio.bibrank_selfcites_task import process_one
        from invenio.bibrank_selfcites_task import get_citations_fun
        from invenio.bibrank_selfcites_indexer import ALL_ALGORITHMS

        tags = get_authors_tags()
        for algorithm in ALL_ALGORITHMS:
            citation_fun = get_citations_fun(algorithm=algorithm)
            process_one(1, tags, citation_fun)
    def test_compute_and_store_self_citations(self):
        from invenio.bibrank_selfcites_indexer import get_authors_tags
        from invenio.bibrank_selfcites_task import compute_and_store_self_citations
        from invenio.bibrank_selfcites_task import get_citations_fun
        from invenio.bibrank_selfcites_indexer import ALL_ALGORITHMS

        tags = get_authors_tags()
        for algorithm in ALL_ALGORITHMS:
            citation_fun = get_citations_fun(algorithm=algorithm)
        compute_and_store_self_citations(1, tags, citation_fun)
    def test_compute_and_store_self_citations(self):
        from invenio.bibrank_selfcites_indexer import get_authors_tags
        from invenio.bibrank_selfcites_task import compute_and_store_self_citations
        from invenio.bibrank_selfcites_task import get_citations_fun
        from invenio.bibrank_selfcites_indexer import ALL_ALGORITHMS

        tags = get_authors_tags()
        for algorithm in ALL_ALGORITHMS:
            citation_fun = get_citations_fun(algorithm=algorithm)
        compute_and_store_self_citations(1, tags, citation_fun)
 def test_get_authors_from_record(self):
     from invenio.bibrank_selfcites_indexer import get_authors_from_record
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     from invenio.config import CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID
     old_config = CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID
     tags = get_authors_tags()
     CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID = 0
     self.assert_(get_authors_from_record(1, tags))
     CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID = 1
     get_authors_from_record(1, tags)
     CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID = old_config
 def test_get_authors_from_record(self):
     from invenio.bibrank_selfcites_indexer import get_authors_from_record
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     from invenio.config import CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID
     old_config = CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID
     tags = get_authors_tags()
     CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID = 0
     self.assert_(get_authors_from_record(1, tags))
     CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID = 1
     get_authors_from_record(1, tags)
     CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID = old_config
 def test_store_record(self):
     from invenio.bibrank_selfcites_indexer import store_record
     from invenio.bibrank_selfcites_indexer import get_authors_from_record
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     from invenio.dbquery import run_sql
     tags = get_authors_tags()
     recid = 1
     authors = get_authors_from_record(recid, tags)
     sql = 'DELETE FROM rnkRECORDSCACHE WHERE id_bibrec = %s'
     run_sql(sql, (recid,))
     store_record(recid, authors)
     sql = 'SELECT count(*) FROM rnkRECORDSCACHE WHERE id_bibrec = %s'
     count = run_sql(sql, (recid,))[0][0]
     self.assert_(count)
 def test_store_record(self):
     from invenio.bibrank_selfcites_indexer import store_record
     from invenio.bibrank_selfcites_indexer import get_authors_from_record
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     from invenio.dbquery import run_sql
     tags = get_authors_tags()
     recid = 1
     authors = get_authors_from_record(recid, tags)
     sql = 'DELETE FROM rnkRECORDSCACHE WHERE id_bibrec = %s'
     run_sql(sql, (recid, ))
     store_record(recid, authors)
     sql = 'SELECT count(*) FROM rnkRECORDSCACHE WHERE id_bibrec = %s'
     count = run_sql(sql, (recid, ))[0][0]
     self.assert_(count)
    def test_store_record_coauthors_with_none_deleted(self):
        from invenio.bibrank_selfcites_indexer import store_record_coauthors
        from invenio.bibrank_selfcites_indexer import get_authors_from_record
        from invenio.bibrank_selfcites_indexer import get_authors_tags
        from invenio.dbquery import run_sql
        tags = get_authors_tags()
        recid = 1
        config = {'friends_threshold': 3}
        authors = get_authors_from_record(recid, tags)

        sql = 'DELETE FROM rnkEXTENDEDAUTHORS WHERE id = %s'
        run_sql(sql, (recid, ))
        store_record_coauthors(recid, authors, [], authors, config)
        sql = 'SELECT count(*) FROM rnkEXTENDEDAUTHORS WHERE id = %s'
        count = run_sql(sql, (recid, ))[0][0]
        self.assert_(count)
    def test_store_record_coauthors_with_none_deleted(self):
        from invenio.bibrank_selfcites_indexer import store_record_coauthors
        from invenio.bibrank_selfcites_indexer import get_authors_from_record
        from invenio.bibrank_selfcites_indexer import get_authors_tags
        from invenio.dbquery import run_sql
        tags = get_authors_tags()
        recid = 1
        config = {'friends_threshold': 3}
        authors = get_authors_from_record(recid, tags)

        sql = 'DELETE FROM rnkEXTENDEDAUTHORS WHERE id = %s'
        run_sql(sql, (recid,))
        store_record_coauthors(recid, authors, [], authors, config)
        sql = 'SELECT count(*) FROM rnkEXTENDEDAUTHORS WHERE id = %s'
        count = run_sql(sql, (recid,))[0][0]
        self.assert_(count)
def fill_self_cites_tables(rank_method_code, config):
    """
    This will fill the self-cites tables with data

    The purpose of this function is to fill these tables on a website that
    never ran the self-cites daemon

    This is an optimization when running on empty tables, and we hope the
    result is the same as the compute_and_store_self_citations.
    """
    begin_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    algorithm = config['algorithm']
    tags = get_authors_tags()
    selfcites_dic = {}
    all_ids = intbitset(run_sql('SELECT id FROM bibrec ORDER BY id'))
    citations_fun = get_citations_fun(algorithm)
    write_message('using %s' % citations_fun.__name__)
    if algorithm == 'friends':
        # We only needs this table for the friends algorithm or assimilated
        # Fill intermediary tables
        for index, recid in enumerate(all_ids):
            if index % 1000 == 0:
                msg = 'intermediate %d/%d' % (index, len(all_ids))
                task_update_progress(msg)
                write_message(msg)
                task_sleep_now_if_required()
            update_self_cites_tables(recid, config, tags)
    # Fill self-cites table
    for index, recid in enumerate(all_ids):
        if index % 1000 == 0:
            msg = 'final %d/%d' % (index, len(all_ids))
            task_update_progress(msg)
            write_message(msg)
            task_sleep_now_if_required()
        compute_and_store_self_citations(recid,
                                         tags,
                                         citations_fun,
                                         selfcites_dic)
    intoDB(selfcites_dic, begin_date, rank_method_code)
    store_weights_cache(selfcites_dic)
Example #20
0
def fill_self_cites_tables(rank_method_code, config):
    """
    This will fill the self-cites tables with data

    The purpose of this function is to fill these tables on a website that
    never ran the self-cites daemon

    This is an optimization when running on empty tables, and we hope the
    result is the same as the compute_and_store_self_citations.
    """
    begin_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    algorithm = config['algorithm']
    tags = get_authors_tags()
    selfcites_dic = {}
    all_ids = intbitset(run_sql('SELECT id FROM bibrec ORDER BY id'))
    citations_fun = get_citations_fun(algorithm)
    write_message('using %s' % citations_fun.__name__)
    if algorithm == 'friends':
        # We only needs this table for the friends algorithm or assimilated
        # Fill intermediary tables
        for index, recid in enumerate(all_ids):
            if index % 1000 == 0:
                msg = 'intermediate %d/%d' % (index, len(all_ids))
                task_update_progress(msg)
                write_message(msg)
                task_sleep_now_if_required()
            update_self_cites_tables(recid, config, tags)
    # Fill self-cites table
    for index, recid in enumerate(all_ids):
        if index % 1000 == 0:
            msg = 'final %d/%d' % (index, len(all_ids))
            task_update_progress(msg)
            write_message(msg)
            task_sleep_now_if_required()
        compute_and_store_self_citations(recid,
                                         tags,
                                         citations_fun,
                                         selfcites_dic)
    intoDB(selfcites_dic, begin_date, rank_method_code)
    store_weights_cache(selfcites_dic)
Example #21
0
def process_updates(rank_method_code):
    """
    This is what gets executed first when the task is started.
    It handles the --rebuild option. If that option is not specified
    we fall back to the process_one()
    """
    write_message("Running rank method: %s" % rank_method_code, verbose=0)

    selfcites_config = read_configuration(rank_method_code)
    config = {
        'algorithm': selfcites_config.get(rank_method_code, "algorithm"),
        'friends_threshold': selfcites_config.get(rank_method_code, "friends_threshold")
    }
    quick = task_get_option("quick") != "no"
    if not quick:
        return rebuild_tables(rank_method_code, config)

    tags = get_authors_tags()
    recids, end_date = fetch_concerned_records(rank_method_code,
                                               task_get_option("id"))
    citations_fun = get_citations_fun(config['algorithm'])
    weights = fromDB(rank_method_code)

    write_message("recids %s" % str(recids))

    total = len(recids)
    for count, recid in enumerate(recids):
        task_sleep_now_if_required(can_stop_too=True)
        msg = "Extracting for %s (%d/%d)" % (recid, count + 1, total)
        task_update_progress(msg)
        write_message(msg)

        process_one(recid, tags, citations_fun, weights)

    intoDB(weights, end_date, rank_method_code)
    store_weights_cache(weights)

    write_message("Complete")
    return True
def process_updates(rank_method_code):
    """
    This is what gets executed first when the task is started.
    It handles the --rebuild option. If that option is not specified
    we fall back to the process_one()
    """
    write_message("Running rank method: %s" % rank_method_code, verbose=0)

    selfcites_config = read_configuration(rank_method_code)
    config = {
        'algorithm': selfcites_config.get(rank_method_code, "algorithm"),
        'friends_threshold': selfcites_config.get(rank_method_code, "friends_threshold")
    }
    quick = task_get_option("quick") != "no"
    if not quick:
        return rebuild_tables(rank_method_code, config)

    tags = get_authors_tags()
    recids, end_date = fetch_concerned_records(rank_method_code,
                                               task_get_option("id"))
    citations_fun = get_citations_fun(config['algorithm'])
    weights = fromDB(rank_method_code)

    write_message("recids %s" % str(recids))

    total = len(recids)
    for count, recid in enumerate(recids):
        task_sleep_now_if_required(can_stop_too=True)
        msg = "Extracting for %s (%d/%d)" % (recid, count + 1, total)
        task_update_progress(msg)
        write_message(msg)

        process_one(recid, tags, citations_fun, weights)

    intoDB(weights, end_date, rank_method_code)
    store_weights_cache(weights)

    write_message("Complete")
    return True
Example #23
0
def process_updates(rank_method_code):
    """
    This is what gets executed first when the task is started.
    It handles the --rebuild option. If that option is not specified
    we fall back to the process_one()
    """
    selfcites_config = read_configuration(rank_method_code)
    config = {
        'algorithm':
        selfcites_config.get(rank_method_code, "algorithm"),
        'friends_threshold':
        selfcites_config.get(rank_method_code, "friends_threshold")
    }
    begin_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    quick = task_get_option("quick") != "no"
    if not quick:
        return rebuild_tables(config)

    write_message("Starting")

    tags = get_authors_tags()
    recids = fetch_concerned_records(rank_method_code)
    citations_fun = get_citations_fun(config['algorithm'])

    write_message("recids %s" % str(recids))

    total = len(recids)
    for count, recid in enumerate(recids):
        task_sleep_now_if_required(can_stop_too=True)
        msg = "Extracting for %s (%d/%d)" % (recid, count + 1, total)
        task_update_progress(msg)
        write_message(msg)

        process_one(recid, tags, citations_fun)

    store_last_updated(rank_method_code, begin_date)

    write_message("Complete")
    return True
def process_updates(rank_method_code):
    """
    This is what gets executed first when the task is started.
    It handles the --rebuild option. If that option is not specified
    we fall back to the process_one()
    """
    selfcites_config = read_configuration(rank_method_code)
    config = {
        'algorithm': selfcites_config.get(rank_method_code, "algorithm"),
        'friends_threshold': selfcites_config.get(rank_method_code, "friends_threshold")
    }
    begin_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    quick = task_get_option("quick") != "no"
    if not quick:
        return rebuild_tables(config)

    write_message("Starting")

    tags = get_authors_tags()
    recids = fetch_concerned_records(rank_method_code)
    citations_fun = get_citations_fun(config['algorithm'])

    write_message("recids %s" % str(recids))

    total = len(recids)
    for count, recid in enumerate(recids):
        task_sleep_now_if_required(can_stop_too=True)
        msg = "Extracting for %s (%d/%d)" % (recid, count + 1, total)
        task_update_progress(msg)
        write_message(msg)

        process_one(recid, tags, citations_fun)

    store_last_updated(rank_method_code, begin_date)

    write_message("Complete")
    return True
 def test_update_self_cites_tables(self):
     from invenio.bibrank_selfcites_indexer import update_self_cites_tables
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     config = {}
     update_self_cites_tables(1, config, tags)
 def test_get_collaborations_from_record(self):
     from invenio.bibrank_selfcites_indexer import get_collaborations_from_record
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     self.assert_(not get_collaborations_from_record(1, tags))
 def test_get_collaborations_from_record(self):
     from invenio.bibrank_selfcites_indexer import get_collaborations_from_record
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     self.assert_(not get_collaborations_from_record(1, tags))
 def test_update_self_cites_tables(self):
     from invenio.bibrank_selfcites_indexer import update_self_cites_tables
     from invenio.bibrank_selfcites_indexer import get_authors_tags
     tags = get_authors_tags()
     config = {}
     update_self_cites_tables(1, config, tags)