Example #1
0
    def test_find_path(self):
        '''Basic test to see if '''
        wiki_graph = WikiGraph()
        start, end = ('Tom Hanks', 'Will I Am')
        path = wiki_graph.find_path(start, end)

        self.assertEqual(path.start, start)
        self.assertEqual(path.end, end)
        self.assertTrue(path, "Path not found")
        self.assertTrue(len(path) < 6, "Path too long len=%d" % len(path))
Example #2
0
def main():
    parser = argparse.ArgumentParser(
        "Find a path between two Wikipedia pages via their links.")
    parser.add_argument("--start",
                        help="Title of valid wikipedia page to start from.",
                        type=str,
                        required=True)
    parser.add_argument("--end",
                        help="Title of valid wikipedia page to reach.",
                        type=str,
                        required=True)
    args = parser.parse_args()

    wiki_graph = WikiGraph()
    print("Searching:  '%s' -> '%s'" % (args.start, args.end))
    path = wiki_graph.find_path(args.start, args.end)
    if path:
        print(path.info)
    else:
        print("Failed Search.")
Example #3
0
    def test_find_path_benchmark(self):
        wiki_graph = WikiGraph(print_requests=True)
        total_requests = 0
        total_time = 0
        failures = []
        # Loop through and test if paths exit
        for page in samplepages:
            (start, end) = (page, "Homunculus")
            path = wiki_graph.find_path(start, end)
            if path.degree == -1:
                failures.append(path)
            total_requests += path.requests
            total_time += path.time

        print("Total Failures:", len(failures))
        print(failures)
        print("Total requests:", total_requests)
        print("Avg number of requests per path: %.2f" %
              (total_requests / len(top10pages)))
        print("Total time: ", total_time)
        print("Avg time per path: %.2f" % (total_time / len(top10pages)))
Example #4
0
def main():
    # initialise args from cli
    parser = argparse.ArgumentParser(
        "For a given sample of articles find a path from each to a central end"
        " article. Write the output to a given csv file.")
    parser.add_argument("-o",
                        "--outfile",
                        help="Filename to save the results to.",
                        type=str,
                        default="wikiresults.json")
    parser.add_argument("-x",
                        "--center",
                        help="Title of valid wiki page to center all nodes on",
                        type=str,
                        default="Homunculus")
    parser.add_argument("-k",
                        "--sample_size",
                        help="Sample size of k pages to search from. "
                        "(Only applies when sample source is not given)",
                        type=int,
                        default=1)
    parser.add_argument(
        "-s",
        "--sample_source",
        help="Filename containing newline delimited list of valid "
        "wiki article titles if not specified sample defaults "
        "to random selection from wikimedia api. ",
        type=str)
    parser.add_argument("-v",
                        action='store_true',
                        help="add to display titles of page requests made.")
    args = parser.parse_args()

    wiki_graph = WikiGraph(print_requests=True)

    # resolve any issues with search sample source.
    if args.sample_source:
        sample = load_sample(args.set)
        size = len(sample)
    else:
        sample = wiki_graph.random_sample(args.sample_size)
        size = args.sample_size

    with open(args.outfile, mode='w') as outfile:
        writer = csv.DictWriter(outfile, ["start", "end", "path", "degree"])
        writer.writeheader()

        total_time = datetime.now()
        total_requests = 0

        for i, page in enumerate(sample):
            print("%d/%d Searching: '%s' -> '%s'" %
                  (i + 1, size, page, args.center))
            path = wiki_graph.find_path(page, args.center)
            print(path.info)
            writer.writerow(path.data)
            total_requests += path.requests

        total_time = (datetime.now() - total_time).total_seconds()
        print("Finished Totals: "
              "N={}. Time={}. Requests={}.".format(args.sample_size,
                                                   total_time, total_requests))