class TestPajek(unittest.TestCase): def setUp(self): self.output = io.StringIO() self._node_stream = io.StringIO() self.pjk = PajekFactory() def test_stream_write(self): self.pjk.add_edge("bob", "tim") self.pjk.add_edge("bob", "rob") self.pjk.add_edge("tim", "rob") self.pjk.add_edge("tim", "bob") self.pjk.write(self.output) self.output.seek(0) self.assertListEqual(self.output.readlines(), [ '*vertices 3\n', '1 "bob"\n', '2 "tim"\n', '3 "rob"\n', '*edges 4\n', '1 2\n', '1 3\n', '2 3\n', '2 1\n' ]) def test_self_cite(self): self.pjk.add_edge("bob", "bob") self.pjk.write(self.output) self.output.seek(0) self.assertListEqual( self.output.readlines(), ['*vertices 1\n', '1 "bob"\n', '*edges 1\n', '1 1\n'])
def pjk_writer(entries, output_file, bench_freq): pjk = PajekFactory() b = Benchmark() for entry_list in iter(entries.get, 'STOP'): for entry in entry_list: for citation in entry["citations"]: pjk.add_edge(entry["id"], citation) b.increment() entries.task_done() b.print_freq() with open_file(output_file, "w") as f: pjk.write(f) entries.task_done()
class TestPajek(unittest.TestCase): def setUp(self): self.output = io.StringIO() self._node_stream = io.StringIO() self.pjk = PajekFactory() def test_stream_write(self): self.pjk.add_edge("bob", "tim") self.pjk.add_edge("bob", "rob") self.pjk.add_edge("tim", "rob") self.pjk.add_edge("tim", "bob") self.pjk.write(self.output) self.output.seek(0) self.assertListEqual(self.output.readlines(), ['*vertices 3\n', '1 "bob"\n', '2 "tim"\n', '3 "rob"\n', '*edges 4\n', '1 2\n', '1 3\n', '2 3\n', '2 1\n']) def test_self_cite(self): self.pjk.add_edge("bob", "bob") self.pjk.write(self.output) self.output.seek(0) self.assertListEqual(self.output.readlines(), ['*vertices 1\n', '1 "bob"\n', '*edges 1\n', '1 1\n'])
from util.PajekFactory import PajekFactory from util.misc import open_file if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Creates Pajek (.net) files from an edge/link file") parser.add_argument('outfile') parser.add_argument('--delimiter', '-d', help="Field delimiter", default='\t') parser.add_argument('--temp-dir', help="Directory to store temporary files in", default=None) parser.add_argument('infile', nargs='+') arguments = parser.parse_args() pjk = PajekFactory(temp_dir=arguments.temp_dir) for filename in arguments.infile: with open_file(filename) as f: for line in f: v_from, v_to = line.split(arguments.delimiter) pjk.add_edge(v_from, v_to.strip()) with open_file(arguments.outfile, 'w') as f: pjk.write(f)
help="Directory to store temporary files in", default=None) parser.add_argument( '--subject', '-s', help= "For WoS, subject must include this. Can be a comma seperated list.") parser.add_argument( '--wos-only', help="For WoS, exclude any citations or ids that contain a dot (.)", action="store_true") parser.add_argument('infile', nargs='+') arguments = parser.parse_args() b = Benchmark() pjk = PajekFactory(temp_dir=arguments.temp_dir) subjects = None if arguments.subject: subjects = set(arguments.subject.split(",")) for filename in arguments.infile: with open_file(filename) as f: for line in f: entry = ujson.loads(line) b.increment() if arguments.wos_only and '.' in entry["id"]: continue if subjects:
def setUp(self): self.output = io.StringIO() self._node_stream = io.StringIO() self.pjk = PajekFactory()
if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Creates Pajek (.net) files from JSON") parser.add_argument("outfile") parser.add_argument("--temp-dir", help="Directory to store temporary files in", default=None) parser.add_argument("--subject", "-s", help="For WoS, subject must include this. Can be a comma seperated list.") parser.add_argument( "--wos-only", help="For WoS, exclude any citations or ids that contain a dot (.)", action="store_true" ) parser.add_argument("infile", nargs="+") arguments = parser.parse_args() b = Benchmark() pjk = PajekFactory(temp_dir=arguments.temp_dir) subjects = None if arguments.subject: subjects = set(arguments.subject.split(",")) for filename in arguments.infile: with open_file(filename) as f: for line in f: entry = ujson.loads(line) b.increment() if arguments.wos_only and "." in entry["id"]: continue if subjects:
#!/usr/bin/env python from util.misc import open_file, Benchmark from util.PajekFactory import PajekFactory import ujson if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Creates Pajek (.net) files from JSON") parser.add_argument('outfile') parser.add_argument('--temp-dir', help="Directory to store temporary files in", default=None) parser.add_argument('infile', nargs='+') arguments = parser.parse_args() b = Benchmark() pjk = PajekFactory(temp_dir=arguments.temp_dir) for filename in arguments.infile: with open_file(filename) as f: for line in f: entry = ujson.loads(line) for citation in entry["citedBy"]: pjk.add_edge(citation, entry["id"]) b.increment() b.print_freq() with open_file(arguments.outfile, "w") as f: pjk.write(f)
from util.PajekFactory import PajekFactory from util.misc import open_file if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Creates Pajek (.net) files from an edge/link file") parser.add_argument('outfile') parser.add_argument('--delimiter', '-d', help="Field delimiter", default='\t') parser.add_argument('--temp-dir', help="Directory to store temporary files in", default=None) parser.add_argument('infile', nargs='+') arguments = parser.parse_args() pjk = PajekFactory(temp_dir=arguments.temp_dir) for filename in arguments.infile: with open_file(filename) as f: for line in f: v_from, v_to = line.split(arguments.delimiter) pjk.add_edge(v_from, v_to.strip()) with open_file(arguments.outfile, 'w') as f: pjk.write(f)