Example #1
0
        output_path = params["OUTPUT_PATH"]
        tmp_output_dir = params["TMP_OUTPUT_DIR"]
    except:
        print "Usage: mortar baconbits:[local_]run pagerank " + "-p INPUT_PATH=<...> -p OUTPUT_PATH=<...> -p TMP_OUTPUT_DIR=<...> "

    damping_factor = 0.85
    if "DAMPING_FACTOR" in params:
        damping_factor = float(params["DAMPING_FACTOR"])

    convergence_threshold = 0.001
    if "CONVERGENCE_THRESHOLD" in params:
        convergence_threshold = float(params["CONVERGENCE_THRESHOLD"])

    max_num_iterations = 10
    if "MAX_NUM_ITERATIONS" in params:
        max_num_iterations = int(params["MAX_NUM_ITERATIONS"])

    id_name_map = None
    if "ID_NAME_MAP" in params:
        id_name_map = params["ID_NAME_MAP"]

    Pagerank.run_pagerank(
        input_path,
        output_path,
        tmp_output_dir,
        damping_factor=damping_factor,
        convergence_threshold=convergence_threshold,
        max_num_iterations=max_num_iterations,
        id_name_map=id_name_map,
    )
from pagerank_lib import Pagerank

# A directed graph with the schema "from, to, weight" and a tab delimiter.
EDGES_INPUT = "s3n://mortar-example-data/patents-pagerank/patent_organization_citation_graph"

# Iteration Parameters -- see README.md for more information
DAMPING_FACTOR        = 0.7
CONVERGENCE_THRESHOLD = 0.0001
MAX_NUM_ITERATIONS    = 20

# Temporary data is stored in HDFS for better performance
TEMPORARY_OUTPUT_PREFIX = "hdfs:///patents-pagerank"

# By default, final output is sent to the S3 bucket mortar-example-output-data,
# in a special directory permissioned for your account.
# See my-pagerank.py for an example of outputting to your own S3 bucket.

if __name__ == "__main__":
    pagerank = Pagerank(EDGES_INPUT,
                        damping_factor=DAMPING_FACTOR,
                        convergence_threshold=CONVERGENCE_THRESHOLD,
                        max_num_iterations=MAX_NUM_ITERATIONS,
                        temporary_output_prefix=TEMPORARY_OUTPUT_PREFIX)
    pagerank.run_pagerank()