output_path = params["OUTPUT_PATH"] tmp_output_dir = params["TMP_OUTPUT_DIR"] except: print "Usage: mortar baconbits:[local_]run pagerank " + "-p INPUT_PATH=<...> -p OUTPUT_PATH=<...> -p TMP_OUTPUT_DIR=<...> " damping_factor = 0.85 if "DAMPING_FACTOR" in params: damping_factor = float(params["DAMPING_FACTOR"]) convergence_threshold = 0.001 if "CONVERGENCE_THRESHOLD" in params: convergence_threshold = float(params["CONVERGENCE_THRESHOLD"]) max_num_iterations = 10 if "MAX_NUM_ITERATIONS" in params: max_num_iterations = int(params["MAX_NUM_ITERATIONS"]) id_name_map = None if "ID_NAME_MAP" in params: id_name_map = params["ID_NAME_MAP"] Pagerank.run_pagerank( input_path, output_path, tmp_output_dir, damping_factor=damping_factor, convergence_threshold=convergence_threshold, max_num_iterations=max_num_iterations, id_name_map=id_name_map, )
from pagerank_lib import Pagerank # A directed graph with the schema "from, to, weight" and a tab delimiter. EDGES_INPUT = "s3n://mortar-example-data/patents-pagerank/patent_organization_citation_graph" # Iteration Parameters -- see README.md for more information DAMPING_FACTOR = 0.7 CONVERGENCE_THRESHOLD = 0.0001 MAX_NUM_ITERATIONS = 20 # Temporary data is stored in HDFS for better performance TEMPORARY_OUTPUT_PREFIX = "hdfs:///patents-pagerank" # By default, final output is sent to the S3 bucket mortar-example-output-data, # in a special directory permissioned for your account. # See my-pagerank.py for an example of outputting to your own S3 bucket. if __name__ == "__main__": pagerank = Pagerank(EDGES_INPUT, damping_factor=DAMPING_FACTOR, convergence_threshold=CONVERGENCE_THRESHOLD, max_num_iterations=MAX_NUM_ITERATIONS, temporary_output_prefix=TEMPORARY_OUTPUT_PREFIX) pagerank.run_pagerank()