class TestPajek(unittest.TestCase):
    def setUp(self):
        self.output = io.StringIO()
        self._node_stream = io.StringIO()
        self.pjk = PajekFactory()

    def test_stream_write(self):
        self.pjk.add_edge("bob", "tim")
        self.pjk.add_edge("bob", "rob")
        self.pjk.add_edge("tim", "rob")
        self.pjk.add_edge("tim", "bob")
        self.pjk.write(self.output)
        self.output.seek(0)
        self.assertListEqual(self.output.readlines(), [
            '*vertices 3\n', '1 "bob"\n', '2 "tim"\n', '3 "rob"\n',
            '*edges 4\n', '1 2\n', '1 3\n', '2 3\n', '2 1\n'
        ])

    def test_self_cite(self):
        self.pjk.add_edge("bob", "bob")
        self.pjk.write(self.output)
        self.output.seek(0)
        self.assertListEqual(
            self.output.readlines(),
            ['*vertices 1\n', '1 "bob"\n', '*edges 1\n', '1 1\n'])
def pjk_writer(entries, output_file, bench_freq):
    pjk = PajekFactory()
    b = Benchmark()
    for entry_list in iter(entries.get, 'STOP'):
        for entry in entry_list:
            for citation in entry["citations"]:
                pjk.add_edge(entry["id"], citation)
            b.increment()
        entries.task_done()

    b.print_freq()
    with open_file(output_file, "w") as f:
        pjk.write(f)
    entries.task_done()
class TestPajek(unittest.TestCase):
    def setUp(self):
        self.output = io.StringIO()
        self._node_stream = io.StringIO()
        self.pjk = PajekFactory()

    def test_stream_write(self):
        self.pjk.add_edge("bob", "tim")
        self.pjk.add_edge("bob", "rob")
        self.pjk.add_edge("tim", "rob")
        self.pjk.add_edge("tim", "bob")
        self.pjk.write(self.output)
        self.output.seek(0)
        self.assertListEqual(self.output.readlines(), ['*vertices 3\n', '1 "bob"\n', '2 "tim"\n', '3 "rob"\n', '*edges 4\n', '1 2\n', '1 3\n', '2 3\n', '2 1\n'])

    def test_self_cite(self):
        self.pjk.add_edge("bob", "bob")
        self.pjk.write(self.output)
        self.output.seek(0)
        self.assertListEqual(self.output.readlines(), ['*vertices 1\n', '1 "bob"\n', '*edges 1\n', '1 1\n'])
def pjk_writer(entries, output_file, bench_freq):
    pjk = PajekFactory()
    b = Benchmark()
    for entry_list in iter(entries.get, 'STOP'):
        for entry in entry_list:
            for citation in entry["citations"]:
                pjk.add_edge(entry["id"], citation)
            b.increment()
        entries.task_done()

    b.print_freq()
    with open_file(output_file, "w") as f:
        pjk.write(f)
    entries.task_done()
Exemple #5
0
from util.PajekFactory import PajekFactory
from util.misc import open_file

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Creates Pajek (.net) files from an edge/link file")
    parser.add_argument('outfile')
    parser.add_argument('--delimiter', '-d', help="Field delimiter", default='\t')
    parser.add_argument('--temp-dir', help="Directory to store temporary files in", default=None)
    parser.add_argument('infile', nargs='+')
    arguments = parser.parse_args()

    pjk = PajekFactory(temp_dir=arguments.temp_dir)

    for filename in arguments.infile:
        with open_file(filename) as f:
            for line in f:
                v_from, v_to = line.split(arguments.delimiter)
                pjk.add_edge(v_from, v_to.strip())

    with open_file(arguments.outfile, 'w') as f:
        pjk.write(f)
                        help="Directory to store temporary files in",
                        default=None)
    parser.add_argument(
        '--subject',
        '-s',
        help=
        "For WoS, subject must include this. Can be a comma seperated list.")
    parser.add_argument(
        '--wos-only',
        help="For WoS, exclude any citations or ids that contain a dot (.)",
        action="store_true")
    parser.add_argument('infile', nargs='+')
    arguments = parser.parse_args()

    b = Benchmark()
    pjk = PajekFactory(temp_dir=arguments.temp_dir)

    subjects = None
    if arguments.subject:
        subjects = set(arguments.subject.split(","))

    for filename in arguments.infile:
        with open_file(filename) as f:
            for line in f:
                entry = ujson.loads(line)
                b.increment()

                if arguments.wos_only and '.' in entry["id"]:
                    continue

                if subjects:
 def setUp(self):
     self.output = io.StringIO()
     self._node_stream = io.StringIO()
     self.pjk = PajekFactory()
if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Creates Pajek (.net) files from JSON")
    parser.add_argument("outfile")
    parser.add_argument("--temp-dir", help="Directory to store temporary files in", default=None)
    parser.add_argument("--subject", "-s", help="For WoS, subject must include this. Can be a comma seperated list.")
    parser.add_argument(
        "--wos-only", help="For WoS, exclude any citations or ids that contain a dot (.)", action="store_true"
    )
    parser.add_argument("infile", nargs="+")
    arguments = parser.parse_args()

    b = Benchmark()
    pjk = PajekFactory(temp_dir=arguments.temp_dir)

    subjects = None
    if arguments.subject:
        subjects = set(arguments.subject.split(","))

    for filename in arguments.infile:
        with open_file(filename) as f:
            for line in f:
                entry = ujson.loads(line)
                b.increment()

                if arguments.wos_only and "." in entry["id"]:
                    continue

                if subjects:
Exemple #9
0
#!/usr/bin/env python
from util.misc import open_file, Benchmark
from util.PajekFactory import PajekFactory
import ujson


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Creates Pajek (.net) files from JSON")
    parser.add_argument('outfile')
    parser.add_argument('--temp-dir', help="Directory to store temporary files in", default=None)
    parser.add_argument('infile', nargs='+')
    arguments = parser.parse_args()

    b = Benchmark()
    pjk = PajekFactory(temp_dir=arguments.temp_dir)

    for filename in arguments.infile:
        with open_file(filename) as f:
            for line in f:
                entry = ujson.loads(line)
                for citation in entry["citedBy"]:
                    pjk.add_edge(citation, entry["id"])
                b.increment()

    b.print_freq()
    with open_file(arguments.outfile, "w") as f:
        pjk.write(f)
Exemple #10
0
from util.PajekFactory import PajekFactory
from util.misc import open_file

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description="Creates Pajek (.net) files from an edge/link file")
    parser.add_argument('outfile')
    parser.add_argument('--delimiter',
                        '-d',
                        help="Field delimiter",
                        default='\t')
    parser.add_argument('--temp-dir',
                        help="Directory to store temporary files in",
                        default=None)
    parser.add_argument('infile', nargs='+')
    arguments = parser.parse_args()

    pjk = PajekFactory(temp_dir=arguments.temp_dir)

    for filename in arguments.infile:
        with open_file(filename) as f:
            for line in f:
                v_from, v_to = line.split(arguments.delimiter)
                pjk.add_edge(v_from, v_to.strip())

    with open_file(arguments.outfile, 'w') as f:
        pjk.write(f)
 def setUp(self):
     self.output = io.StringIO()
     self._node_stream = io.StringIO()
     self.pjk = PajekFactory()