def test_pagerank_calculation(self): """Create a few items and fake citation relation among them, then run the pagerank algorithm. Check whether this simple case can get the correct result. """ # calculate pagerank of these 3 document comm = Command() self.verbosity = 1 comm.do_pagerank(chown=False) # read in the pagerank file, converting to a dict pr_values_from_file = {} data_path = get_data_dir('collection1') + "external_pagerank" with open(data_path) as f: for line in f: pk, value = line.split('=') pr_values_from_file[pk] = float(value.strip()) # Verify that whether the answer is correct, based on calculations in # Gephi answers = { '1': 0.369323534954, '2': 0.204581549974, '3': 0.378475867453, } for key, value in answers.items(): self.assertTrue( abs(pr_values_from_file[key] - value) < 0.0001, msg="The answer for item %s was %s when it should have been " "%s" % (key, pr_values_from_file[key], answers[key],) )
def test_pagerank_calculation(self): """Create a few items and fake citation relation among them, then run the pagerank algorithm. Check whether this simple case can get the correct result. """ # calculate pagerank of these 3 document comm = Command() self.verbosity = 1 comm.do_pagerank(chown=False) # read in the pagerank file, converting to a dict pr_values_from_file = {} data_path = get_data_dir('collection1') + "external_pagerank" with open(data_path) as f: for line in f: pk, value = line.split('=') pr_values_from_file[pk] = float(value.strip()) # Verify that whether the answer is correct, based on calculations in # Gephi answers = { '1': 0.369323534954, '2': 0.204581549974, '3': 0.378475867453, } for key, value in answers.items(): self.assertTrue( abs(pr_values_from_file[key] - value) < 0.0001, msg="The answer for item %s was %s when it should have been " "%s" % ( key, pr_values_from_file[key], answers[key], ))
def handle(self, *args, **options): super(Command, self).handle(*args, **options) pr_results = self.do_pagerank() pr_dest_dir = settings.SOLR_PAGERANK_DEST_DIR make_sorted_pr_file(pr_results, pr_dest_dir) normal_dest_dir = get_data_dir('collection1') + "external_pagerank" print("Pagerank file created at %s. Because of distributed servers, " "you may need to copy it to its final destination. Somewhere " "like: %s." % (pr_dest_dir, normal_dest_dir))
class Command(BaseCommand): args = '<args>' help = 'Calculate pagerank value for every case' RESULT_FILE_PATH = get_data_dir('collection1') + "external_pagerank" def do_pagerank(self, chown=True): g = make_and_populate_nx_graph() pr_results = g.pagerank() make_sorted_pr_file(pr_results, self.RESULT_FILE_PATH) reload_pagerank_external_file_cache() cp_pr_file_to_bulk_dir(self.RESULT_FILE_PATH, chown) def handle(self, *args, **options): self.do_pagerank()