Example #1
0
 def test_parse_tree(self):
     for rec in parse_tree(self.deep):
         print(rec)
         print(len(rec[1]))
Example #2
0
    db = leveldb.LevelDB(args.db_path,
                         write_buffer_size=100 << 20,  # 100MB
                         block_cache_size=400 << 20)  # 400MB

    with open_file(args.infile) as ifs:
        b = Benchmark(args.benchmark_freq)
        # Remove any non-wos entries
        if args.wos_only:
            tf = filter(lambda r: r.pid.startswith("WOS:") and '.' not in r.pid, TreeFile(ifs))
        else:
            tf = TreeFile(ifs)

        writer = db

        for parent_cid, parent_papers, leaf_recs in parse_tree(tf, args.limit):
            for l in leaf_recs:
                for paper in l.get_papers():
                    # Register each paper's cluster_id
                    writer.Put(("paper|" + paper).encode(), l.cluster_id.encode())
                    b.increment()
                # Register recommendations for each leaf cluster
                writer.Put(("cluster|" + l.cluster_id).encode(), msgpack.packb(l.get_papers()))
                b.increment()
            # Now register the parent (expert) recommendations
            # TODO: A hacky method to stop us from registering singleton-parents
            if ':' in parent_cid:
                writer.Put(("cluster|" + parent_cid).encode(), msgpack.packb(parent_papers))
            b.increment()

        b.print_freq()
Example #3
0
        write_buffer_size=100 << 20,  # 100MB
        block_cache_size=400 << 20)  # 400MB

    with open_file(args.infile) as ifs:
        b = Benchmark(args.benchmark_freq)
        # Remove any non-wos entries
        if args.wos_only:
            tf = filter(
                lambda r: r.pid.startswith("WOS:") and '.' not in r.pid,
                TreeFile(ifs))
        else:
            tf = TreeFile(ifs)

        writer = db

        for parent_cid, parent_papers, leaf_recs in parse_tree(tf, args.limit):
            for l in leaf_recs:
                for paper in l.get_papers():
                    # Register each paper's cluster_id
                    writer.Put(("paper|" + paper).encode(),
                               l.cluster_id.encode())
                    b.increment()
                # Register recommendations for each leaf cluster
                writer.Put(("cluster|" + l.cluster_id).encode(),
                           msgpack.packb(l.get_papers()))
                b.increment()
            # Now register the parent (expert) recommendations
            # TODO: A hacky method to stop us from registering singleton-parents
            if ':' in parent_cid:
                writer.Put(("cluster|" + parent_cid).encode(),
                           msgpack.packb(parent_papers))
Example #4
0
 def test_parse_tree(self):
     for rec in parse_tree(self.deep):
         print(rec)
         print(len(rec[1]))