Example #1
0
    def test_singletons(self):
        tr = TreeFile(SINGLETONS)
        for recs in make_expert_rec(tr):
            raise "This shouldn't ever happen"

        tr = TreeFile(SINGLETONS)
        for recs in make_classic_recs(tr):
            raise "This shouldn't ever happen"
Example #2
0
    def test_singletons(self):
        tr = TreeFile(SINGLETONS)
        for recs in make_expert_rec(tr):
            raise "This shouldn't ever happen"

        tr = TreeFile(SINGLETONS)
        for recs in make_classic_recs(tr):
            raise "This shouldn't ever happen"
Example #3
0
def make_classic(args, b, t):
    with t.get_batch_put_context() as batch:
        args.tree.seek(0)
        parser = TreeFile(args.tree)
        for classic_rec in process_record_stream(make_classic_recs(parser)):
            if args.verbose:
                print(classic_rec)
            if not args.dryrun:
                batch.put_item(classic_rec)
            b.increment()
Example #4
0
def make_classic(args, b, t):
    with t.get_batch_put_context() as batch:
        args.tree.seek(0)
        parser = TreeFile(args.tree)
        for classic_rec in process_record_stream(make_classic_recs(parser)):
            if args.verbose:
                print(classic_rec)
            if not args.dryrun:
                batch.put_item(classic_rec)
            b.increment()
    tf = TreeFile(args.infile)

    if args.batch_size:
        writer = leveldb.WriteBatch()
    else:
        writer = db

    for recs in make_expert_rec(tf, args.limit):
        recd = [r.pid for r in recs]
        key = recs[0].target_pid + "|expert"
        writer.Put(key.encode(), msgpack.packb(recd))
        b.increment()
        if args.batch_size and b.count % args.batch_size == 0:
            db.Write(writer)

    args.infile.seek(0)
    tf = TreeFile(args.infile)
    for recs in make_classic_recs(tf, args.limit):
        recd = [r.pid for r in recs]
        key = recs[0].target_pid + "|classic"
        writer.Put(key.encode(), msgpack.packb(recd))
        b.increment()
        if args.batch_size and b.count % args.batch_size == 0:
            db.Write(writer)

    if args.batch_size:
        db.Write(writer, sync=True)

    b.print_freq()
    print(db.GetStats())
Example #6
0
 def test_classic_limit(self):
     for recs in make_classic_recs(self.tr, 5):
         recd = [r.pid for r in recs]
         answer = make_answer(CLASSIC, recs[0].target_pid)[:5]
         self.assertListEqual(recd, answer)
Example #7
0
 def test_classic(self):
     for recs in make_classic_recs(self.tr):
         recd = [r.pid for r in recs]
         self.assertListEqual(recd, make_answer(CLASSIC, recs[0].target_pid))
            if args.verbose:
                print(expert_rec)
            if not args.dryrun:
                batch.put_item(expert_rec)
            entries += 1
            if entries % 50000 == 0:
                current_time = time.time()
                current_rate = entries/(current_time - start)
                print("\nProcessed {0:,} entries in {1:.0f} seconds: {2:.2f} entries/sec".format(entries, time.time()-start, entries/(time.time()-start)))
                sys.stdout.flush()

        # Reset for the second pass
        print("Generating classic recommendations...")
        args.tree.seek(0)
        parser = TreeFile(args.tree)
        for classic_rec in process_record_stream(make_classic_recs(parser)):
            if args.verbose:
                print(classic_rec)
            if not args.dryrun:
                batch.put_item(classic_rec)
            entries += 1
            if entries % 50000 == 0:
                current_time = time.time()
                current_rate = entries/(current_time - start)
                print("\nProcessed {0:,} entries in {1:.0f} seconds: {2:.2f} entries/sec".format(entries, time.time()-start, entries/(time.time()-start)))
                sys.stdout.flush()
    end = time.time()
    print("\nProcessed {0:,} entries in {1:.0f} seconds: {2:.2f} entries/sec".format(entries, end-start, entries/(end-start)))

    if not args.dryrun:
        t.update_throughput()
Example #9
0
 def test_classic_limit(self):
     for recs in make_classic_recs(self.tr, 5):
         recd = [r.pid for r in recs]
         answer = make_answer(CLASSIC, recs[0].target_pid)[:5]
         self.assertListEqual(recd, answer)
Example #10
0
 def test_classic(self):
     for recs in make_classic_recs(self.tr):
         recd = [r.pid for r in recs]
         self.assertListEqual(recd, make_answer(CLASSIC,
                                                recs[0].target_pid))
Example #11
0
from recommenders.ef import make_classic_recs, make_expert_rec
from util.misc import Benchmark

if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(description="Creates EF recommendations and store them in a DBM")
    parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
    parser.add_argument('shelf')
    parser.add_argument('--benchmark-freq', default=10000, type=int)
    parser.add_argument('--toint', help="Convert scores to integers, larger is better", action='store_true', default=False)
    parser.add_argument('-l', '--limit', type=int, help="Max number of recommendations to generate per-paper", default=10)
    args = parser.parse_args()

    tf = TreeFile(args.infile)
    b = Benchmark(args.benchmark_freq)
    with shelve.open(args.shelf, flag='n', protocol=pickle.HIGHEST_PROTOCOL) as s:
        for recs in make_expert_rec(tf, args.limit):
            recd = [r.pid for r in recs]
            s['expert|'+recs[0].target_pid] = recd
            b.increment()

        args.infile.seek(0)
        tf = TreeFile(args.infile)
        for recs in make_classic_recs(tf, args.limit):
            recd = [r.pid for r in recs]
            s['classic|'+recs[0].target_pid] = recd
            b.increment()

    b.print_freq()