# Read the Data Format as input
    if len(sys.argv) != 3:
        print "usage: python ranked_list_generation.py <EXPERIMENT_NAME> <REGION (e.g. chicago)>"
        exit(1)

    EXPERIMENT_NAME = sys.argv[1]
    REGION = sys.argv[2]

    DATA_DIR = "data"
    PARTITIONED_DATA_DIR = path.join(DATA_DIR, "partitioned_data", REGION)
    EXPERIMENT_DATA_DIR = path.join(DATA_DIR, "experiments", EXPERIMENT_NAME)
    RECOMMENDATION_DATA_DIR = path.join(EXPERIMENT_DATA_DIR, REGION,
                                        "recommendations")

    # Reading the Experiment Attributes
    EXPERIMENT_ATTS = read_experiment_atts(EXPERIMENT_DATA_DIR)

    # Iterate over the EXPERIMENT partitions
    for part in EXPERIMENT_ATTS['partitions']:
        print "Partition %s" % part

        # Define the DB partition dir and the RESULT partition dir
        db_partition_dir = path.join(PARTITIONED_DATA_DIR,
                                     "partition_%s" % part)
        result_partition_dir = path.join(RECOMMENDATION_DATA_DIR,
                                         "partition_%s" % part)

        # Define the partition ranks directory
        partition_rank_dir = path.join(result_partition_dir, "ranks")

        print "\tReading the mapping files..."
Esempio n. 2
0
    # Read the Data Format as input
    if len(sys.argv) != 3:
        print "usage: python ranked_list_generation.py <EXPERIMENT_NAME> <REGION (e.g. chicago)>"
        exit(1)

    EXPERIMENT_NAME = sys.argv[1]
    REGION = sys.argv[2]

    DATA_DIR = "data"
    PARTITIONED_DATA_DIR = path.join(DATA_DIR, "partitioned_data", REGION)
    EXPERIMENT_DATA_DIR = path.join(DATA_DIR, "experiments", EXPERIMENT_NAME)
    RECOMMENDATION_DATA_DIR = path.join(EXPERIMENT_DATA_DIR, REGION, "recommendations")

    # Reading the Experiment Attributes
    EXPERIMENT_ATTS = read_experiment_atts(EXPERIMENT_DATA_DIR)

    # Iterate over the EXPERIMENT partitions
    for part in EXPERIMENT_ATTS['partitions']:
        print "Partition %s" % part

        # Define the DB partition dir and the RESULT partition dir
        db_partition_dir = path.join(PARTITIONED_DATA_DIR, "partition_%s" % part)
        result_partition_dir = path.join(RECOMMENDATION_DATA_DIR, "partition_%s" % part)

        # Define the partition ranks directory
        partition_rank_dir = path.join(result_partition_dir, "ranks")

        print "\tReading the mapping files..."
        # Read the mapping user_id files
        map_event_ids = read_map_event_ids(db_partition_dir)
Esempio n. 3
0
    REGION = ARGS.region
    ALGORITHMS = ARGS.algorithms
    MAX_PARALLEL = ARGS.max_parallel


    DATA_DIR = "data"
    PARTITIONED_REGION_DATA_DIR = os.path.join(DATA_DIR, "partitioned_data", REGION)
    EXPERIMENT_DIR = os.path.join(DATA_DIR, "experiments", EXPERIMENT_NAME)
    REC_REGION_DATA_DIR = os.path.join(EXPERIMENT_DIR, REGION, "recommendations")
    REC_RESULT_DIR_NAME = "hybrid_models"

    LOGGER.info("HYBRID Algorithms")
    LOGGER.info(ALGORITHMS)

    # Read the experiment attributes
    PARTITIONS = read_experiment_atts(EXPERIMENT_DIR)["partitions"]

    DICT_ALG_RANKFILE = get_dict_alg_files(REGION)

    ENSEMBLE_LIST = sorted(DICT_ALG_RANKFILE.keys())

    if MAX_PARALLEL > 1:
        # Define the Multiprocessing Pool (with size equals to CPU_COUNT -1)
        EXPERIMENT_POOL = multiprocessing.Pool(MAX_PARALLEL)
        # Starts the multiple processes
        EXPERIMENT_POOL.map(create_models_and_recommend,
                            get_models_to_experiment(PARTITIONS, ALGORITHMS,
                                                     PARTITIONED_REGION_DATA_DIR, REC_REGION_DATA_DIR, REC_RESULT_DIR_NAME))
    else:
        for experiment_data in get_models_to_experiment(PARTITIONS, ALGORITHMS,
                                                        PARTITIONED_REGION_DATA_DIR, REC_REGION_DATA_DIR, REC_RESULT_DIR_NAME):
    EXPERIMENT_NAME = ARGS.experiment_name
    REGION = ARGS.region
    ALGORITHMS = ARGS.algorithms
    PARALLEL_EXECUTION = not ARGS.not_parallel

    DATA_DIR = "data"
    PARTITIONED_REGION_DATA_DIR = path.join(DATA_DIR, "partitioned_data",
                                            REGION)
    EXPERIMENT_DIR = path.join(DATA_DIR, "experiments", EXPERIMENT_NAME)
    EXPERIMENT_REGION_DATA_DIR = path.join(EXPERIMENT_DIR, REGION)

    LOGGER.info("Content-Based Algorithms")
    LOGGER.info(ALGORITHMS)

    # Read the experiment attributes
    PARTITIONS = read_experiment_atts(EXPERIMENT_DIR)["partitions"]

    if PARALLEL_EXECUTION:
        # Define the Multiprocessing Pool (with size equals to CPU_COUNT -1)
        EXPERIMENT_POOL = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
        # Starts the multiple processes
        EXPERIMENT_POOL.map(
            create_models_and_recommend,
            get_models_to_experiment(PARTITIONS, ALGORITHMS,
                                     PARTITIONED_REGION_DATA_DIR,
                                     EXPERIMENT_REGION_DATA_DIR))
    else:
        for experiment_data in get_models_to_experiment(
                PARTITIONS, ALGORITHMS, PARTITIONED_REGION_DATA_DIR,
                EXPERIMENT_REGION_DATA_DIR):
            create_models_and_recommend(experiment_data)