def test_singletons(self): tr = TreeFile(SINGLETONS) for recs in make_expert_rec(tr): raise "This shouldn't ever happen" tr = TreeFile(SINGLETONS) for recs in make_classic_recs(tr): raise "This shouldn't ever happen"
def make_expert(args, b, t): with t.get_batch_put_context() as batch: args.tree.seek(0) parser = TreeFile(args.tree) for expert_rec in process_record_stream(make_expert_rec(parser)): if args.verbose: print(expert_rec) if not args.dryrun: batch.put_item(expert_rec) b.increment()
default=10) args = parser.parse_args() db = leveldb.LevelDB( args.db_path, write_buffer_size=100 << 20, # 100MB block_cache_size=400 << 20) # 400MB b = Benchmark(args.benchmark_freq) tf = TreeFile(args.infile) if args.batch_size: writer = leveldb.WriteBatch() else: writer = db for recs in make_expert_rec(tf, args.limit): recd = [r.pid for r in recs] key = recs[0].target_pid + "|expert" writer.Put(key.encode(), msgpack.packb(recd)) b.increment() if args.batch_size and b.count % args.batch_size == 0: db.Write(writer) args.infile.seek(0) tf = TreeFile(args.infile) for recs in make_classic_recs(tf, args.limit): recd = [r.pid for r in recs] key = recs[0].target_pid + "|classic" writer.Put(key.encode(), msgpack.packb(recd)) b.increment() if args.batch_size and b.count % args.batch_size == 0:
def test_expert_limit(self): for recs in make_expert_rec(self.tr, 5): recd = [r.pid for r in recs] answer = make_answer(EXPERT, recs[0].target_pid)[:5] self.assertListEqual(recd, answer)
def test_expert(self): for recs in make_expert_rec(self.tr): recd = [r.pid for r in recs] self.assertListEqual(recd, make_answer(EXPERT, recs[0].target_pid))
if not args.dryrun: t.delete() if args.create: logging.info("Creating table: " + t.table_name) if not args.dryrun: t.create(write=2000) entries = 0 start = time.time() parser = TreeFile(args.tree) with t.get_batch_put_context() as batch: print("Generating expert recommendations...") for expert_rec in process_record_stream(make_expert_rec(parser)): if args.verbose: print(expert_rec) if not args.dryrun: batch.put_item(expert_rec) entries += 1 if entries % 50000 == 0: current_time = time.time() current_rate = entries/(current_time - start) print("\nProcessed {0:,} entries in {1:.0f} seconds: {2:.2f} entries/sec".format(entries, time.time()-start, entries/(time.time()-start))) sys.stdout.flush() # Reset for the second pass print("Generating classic recommendations...") args.tree.seek(0) parser = TreeFile(args.tree)
from recommenders.ef import make_classic_recs, make_expert_rec from util.misc import Benchmark if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser(description="Creates EF recommendations and store them in a DBM") parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('shelf') parser.add_argument('--benchmark-freq', default=10000, type=int) parser.add_argument('--toint', help="Convert scores to integers, larger is better", action='store_true', default=False) parser.add_argument('-l', '--limit', type=int, help="Max number of recommendations to generate per-paper", default=10) args = parser.parse_args() tf = TreeFile(args.infile) b = Benchmark(args.benchmark_freq) with shelve.open(args.shelf, flag='n', protocol=pickle.HIGHEST_PROTOCOL) as s: for recs in make_expert_rec(tf, args.limit): recd = [r.pid for r in recs] s['expert|'+recs[0].target_pid] = recd b.increment() args.infile.seek(0) tf = TreeFile(args.infile) for recs in make_classic_recs(tf, args.limit): recd = [r.pid for r in recs] s['classic|'+recs[0].target_pid] = recd b.increment() b.print_freq()