Ejemplo n.º 1
0
def get_db(clear=True):
    "Set up the database for testing."
    db = get_test_db()
    db.grab_session()
    if clear:
        db._clear(force=True)
    return db
Ejemplo n.º 2
0
    def __init__(self, max_total_stmts):
        self.test_db = dbu.get_test_db()
        self.test_db._clear(force=True)
        with open(os.path.join(THIS_DIR, 'db_pa_test_input_1M.pkl'),
                  'rb') as f:
            self.test_data = pickle.load(f)

        if max_total_stmts < len(self.test_data['raw_statements']['tuples']):
            self.stmt_tuples = random.sample(
                self.test_data['raw_statements']['tuples'], max_total_stmts)
        else:
            self.stmt_tuples = self.test_data['raw_statements']['tuples']

        self.used_stmt_tuples = set()
        return
Ejemplo n.º 3
0
def _get_background_loaded_db():
    db = db_util.get_test_db()
    db._clear(force=True)

    # get and load the provenance for the statements.
    print("\tloading background metadata...")
    db.copy('text_ref', _load_tuples('test_text_ref_tuples.pkl'),
            ('id', 'pmid', 'pmcid', 'doi'))
    tc_tuples = [
        t + (b'', ) for t in _load_tuples('test_text_content_tuples.pkl')
    ]
    db.copy('text_content', tc_tuples,
            ('id', 'text_ref_id', 'source', 'format', 'text_type', 'content'))
    r_tuples = [t + (b'', ) for t in _load_tuples('test_reading_tuples.pkl')]
    db.copy('reading', r_tuples, ('id', 'reader', 'reader_version',
                                  'text_content_id', 'format', 'bytes'))
    db.copy('db_info', _load_tuples('test_db_info_tuples.pkl'),
            ('id', 'db_name'))
    return db
Ejemplo n.º 4
0
def test_normal_db_reading_call():
    chdir(path.expanduser('~'))
    # Put some basic stuff in the test databsae
    N = 6
    db = dbu.get_test_db()
    db._clear(force=True)
    db.copy('text_ref', [(i, 'PMID80945%d' % i) for i in range(N)],
            cols=('id', 'pmid'))
    text_content = [
        (i, i, 'pubmed', 'text', 'abstract',
         zip_string('MEK phosphorylates ERK in test %d.' % i))
        for i in range(N)
        ]
    text_content += [
        (N, N-1, 'pmc_oa', 'text', 'fulltext',
         zip_string('MEK phosphorylates ERK. EGFR activates SHC.'))
        ]
    db.copy('text_content', text_content,
            cols=('id', 'text_ref_id', 'source', 'format', 'text_type',
                  'content'))

    # Put an id file on s3
    basename = 'local_db_test_run'
    s3_prefix = 'reading_results/%s/' % basename
    s3.put_object(Bucket='bigmech', Key=s3_prefix + 'id_list',
                  Body='\n'.join(['tcid: %d' % i
                                  for i in range(len(text_content))]))

    # Call the reading tool
    sub = srp.DbReadingSubmitter(basename, ['sparser'])
    job_name, cmd = sub._make_command(0, len(text_content))
    cmd += ['--test']
    check_call(cmd)
    sub.produce_report()

    # Remove garbage on s3
    res = s3.list_objects(Bucket='bigmech', Prefix=s3_prefix)
    for entry in res['Contents']:
        print("Removing %s..." % entry['Key'])
        s3.delete_object(Bucket='bigmech', Key=entry['Key'])
    return
Ejemplo n.º 5
0
from indra.db.client import get_content_by_refs
from indra.db.reading_manager import BulkLocalReadingManager

from .util import needs_py3, IS_PY3

if IS_PY3:
    from indra.db.content_manager import Pubmed, PmcOA, Manuscripts, Elsevier

if '-a' in argv:
    attr_str = argv[argv.index('-a') + 1]
    if any(
        [not_attr in attr_str for not_attr in ('!nonpublic', '!webservice')]):
        raise SkipTest("Every test is nonpublic and a webservice.")

try:
    get_test_db()
except Exception as e:
    raise SkipTest("Not able to start up any of the available test hosts:\n" +
                   str(e))


#==============================================================================
# The following are some helpful functions for the rest of the tests.
#==============================================================================
def assert_contents_equal(list1, list2, msg=None):
    "Check that the contenst of two lists are the same, regardless of order."
    res = set(list1) == set(list2)
    err_msg = "Contents of lists do not match:\n%s\n%s\n" % (list1, list2)
    if msg is not None:
        err_msg += msg
    assert res, err_msg
Ejemplo n.º 6
0
        logger.info("Making readings...")
        outputs = rdb.produce_readings({'trid': trids}, [reader_inst],
                                       read_mode='unread_unread',
                                       db=db,
                                       prioritize=True,
                                       verbose=self.verbose)
        logger.info("Made %d readings." % len(outputs))
        logger.info("Making statements...")
        rdb.produce_statements(outputs, n_proc=self.n_proc, db=db)
        return


if __name__ == '__main__':
    if args.test:
        db = get_test_db()
    else:
        db = get_primary_db()

    if args.method == 'local':
        bulk_managers = [
            BulkLocalReadingManager(reader_name,
                                    buffer_days=args.buffer,
                                    n_proc=args.num_procs)
            for reader_name in ['SPARSER', 'REACH']
        ]
    elif args.method == 'aws':
        bulk_managers = [
            BulkAwsReadingManager(reader_name,
                                  buffer_days=args.buffer,
                                  project_name=args.project_name)
Ejemplo n.º 7
0
    id_dict = get_id_dict([line.strip() for line in id_str_list])

    # Some combinations of options don't make sense:
    forbidden_combos = [('all', 'unread'), ('none', 'unread'), ('none', 'none')]
    assert (args.read_mode, args.stmt_mode) not in forbidden_combos, \
        ("The combination of reading mode %s and statement mode %s is not "
         "allowed." % (args.reading_mode, args.stmt_mode))

    # Init some timing dicts
    starts = {}
    ends = {}

    # Get a handle for the database
    if args.test:
        from indra.db import util as dbu
        db = dbu.get_test_db()
    else:
        db = None

    s3_log_prefix = ('reading_results/%s/logs/run_db_reading_queue/%s/'
                     % (args.basename, args.job_name))

    # Read everything ========================================
    starts['reading'] = datetime.now()
    outputs = produce_readings(id_dict, readers, verbose=True,
                               read_mode=args.read_mode,
                               get_preexisting=(args.stmt_mode == 'all'),
                               force_fulltext=args.force_fulltext,
                               prioritize=args.use_best_fulltext, db=db)
    ends['reading'] = datetime.now()