Beispiel #1
0
    def test_singletons(self):
        tr = TreeFile(SINGLETONS)
        for recs in make_expert_rec(tr):
            raise "This shouldn't ever happen"

        tr = TreeFile(SINGLETONS)
        for recs in make_classic_recs(tr):
            raise "This shouldn't ever happen"
Beispiel #2
0
    def test_singletons(self):
        tr = TreeFile(SINGLETONS)
        for recs in make_expert_rec(tr):
            raise "This shouldn't ever happen"

        tr = TreeFile(SINGLETONS)
        for recs in make_classic_recs(tr):
            raise "This shouldn't ever happen"
Beispiel #3
0
def make_expert(args, b, t):
    with t.get_batch_put_context() as batch:
        args.tree.seek(0)
        parser = TreeFile(args.tree)
        for expert_rec in process_record_stream(make_expert_rec(parser)):
            if args.verbose:
                print(expert_rec)
            if not args.dryrun:
                batch.put_item(expert_rec)
            b.increment()
Beispiel #4
0
def make_expert(args, b, t):
    with t.get_batch_put_context() as batch:
        args.tree.seek(0)
        parser = TreeFile(args.tree)
        for expert_rec in process_record_stream(make_expert_rec(parser)):
            if args.verbose:
                print(expert_rec)
            if not args.dryrun:
                batch.put_item(expert_rec)
            b.increment()
        default=10)
    args = parser.parse_args()

    db = leveldb.LevelDB(
        args.db_path,
        write_buffer_size=100 << 20,  # 100MB
        block_cache_size=400 << 20)  # 400MB
    b = Benchmark(args.benchmark_freq)
    tf = TreeFile(args.infile)

    if args.batch_size:
        writer = leveldb.WriteBatch()
    else:
        writer = db

    for recs in make_expert_rec(tf, args.limit):
        recd = [r.pid for r in recs]
        key = recs[0].target_pid + "|expert"
        writer.Put(key.encode(), msgpack.packb(recd))
        b.increment()
        if args.batch_size and b.count % args.batch_size == 0:
            db.Write(writer)

    args.infile.seek(0)
    tf = TreeFile(args.infile)
    for recs in make_classic_recs(tf, args.limit):
        recd = [r.pid for r in recs]
        key = recs[0].target_pid + "|classic"
        writer.Put(key.encode(), msgpack.packb(recd))
        b.increment()
        if args.batch_size and b.count % args.batch_size == 0:
Beispiel #6
0
 def test_expert_limit(self):
     for recs in make_expert_rec(self.tr, 5):
         recd = [r.pid for r in recs]
         answer = make_answer(EXPERT, recs[0].target_pid)[:5]
         self.assertListEqual(recd, answer)
Beispiel #7
0
 def test_expert(self):
     for recs in make_expert_rec(self.tr):
         recd = [r.pid for r in recs]
         self.assertListEqual(recd, make_answer(EXPERT, recs[0].target_pid))
        if not args.dryrun:
            t.delete()

    if args.create:
        logging.info("Creating table: " + t.table_name)
        if not args.dryrun:
            t.create(write=2000)

    entries = 0
    start = time.time()

    parser = TreeFile(args.tree)

    with t.get_batch_put_context() as batch:
        print("Generating expert recommendations...")
        for expert_rec in process_record_stream(make_expert_rec(parser)):
            if args.verbose:
                print(expert_rec)
            if not args.dryrun:
                batch.put_item(expert_rec)
            entries += 1
            if entries % 50000 == 0:
                current_time = time.time()
                current_rate = entries/(current_time - start)
                print("\nProcessed {0:,} entries in {1:.0f} seconds: {2:.2f} entries/sec".format(entries, time.time()-start, entries/(time.time()-start)))
                sys.stdout.flush()

        # Reset for the second pass
        print("Generating classic recommendations...")
        args.tree.seek(0)
        parser = TreeFile(args.tree)
Beispiel #9
0
 def test_expert_limit(self):
     for recs in make_expert_rec(self.tr, 5):
         recd = [r.pid for r in recs]
         answer = make_answer(EXPERT, recs[0].target_pid)[:5]
         self.assertListEqual(recd, answer)
Beispiel #10
0
 def test_expert(self):
     for recs in make_expert_rec(self.tr):
         recd = [r.pid for r in recs]
         self.assertListEqual(recd, make_answer(EXPERT, recs[0].target_pid))
from recommenders.ef import make_classic_recs, make_expert_rec
from util.misc import Benchmark

if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(description="Creates EF recommendations and store them in a DBM")
    parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
    parser.add_argument('shelf')
    parser.add_argument('--benchmark-freq', default=10000, type=int)
    parser.add_argument('--toint', help="Convert scores to integers, larger is better", action='store_true', default=False)
    parser.add_argument('-l', '--limit', type=int, help="Max number of recommendations to generate per-paper", default=10)
    args = parser.parse_args()

    tf = TreeFile(args.infile)
    b = Benchmark(args.benchmark_freq)
    with shelve.open(args.shelf, flag='n', protocol=pickle.HIGHEST_PROTOCOL) as s:
        for recs in make_expert_rec(tf, args.limit):
            recd = [r.pid for r in recs]
            s['expert|'+recs[0].target_pid] = recd
            b.increment()

        args.infile.seek(0)
        tf = TreeFile(args.infile)
        for recs in make_classic_recs(tf, args.limit):
            recd = [r.pid for r in recs]
            s['classic|'+recs[0].target_pid] = recd
            b.increment()

    b.print_freq()