예제 #1
0
def execute_evaluation_markov_all_users_vs_one(train_tags, test_tags, user_id, execution_id, input_data_version,
                                               is_distributive, random_dummy_mode, dir_name, save_result):

    test_data = test_markov(train_tags, test_tags, is_distributive=is_distributive, random_dummy_mode=random_dummy_mode)

    test_data["algorithm"] = "markov"
    test_data["trained_with"] = "all_other_users"
    test_data["train_size"] = len(train_tags)
    test_data["test_size"] = len(test_tags)

    if random_dummy_mode is None:
        test_data["is_dummy"] = True
    else:
        test_data["is_dummy"] = False

    test_data["method"] = "all_users_vs_one"
    test_data["user_id"] = user_id

    test_data["is_distributive"] = is_distributive
    test_data["input_data_version"] = input_data_version

    test_data["test_id"] = execution_id

    if save_result:
        experiments_dao.save_execution_test_data(result_dict=test_data,
                                                 filename=dir_name + "/" + test_data["test_id"])
예제 #2
0
def evaluation_ctw_single_partition_light_mem(tags_sequence,
                                              user_id,
                                              input_data_version,
                                              predict_choice_method,
                                              dir_name,
                                              depth,
                                              repeats_n=3,
                                              save_result=True):

    if len(tags_sequence) <= 1:
        print("sr_group size: {} \n skipping".format(len(tags_sequence)))
        raise exceptions.TooShortStopRegionGroup()

    execution_id = str(uuid.uuid4())

    for repeat_i in range(repeats_n):

        test_data = test_ctw(tags_sequence,
                             depth=depth,
                             predict_choice_method=predict_choice_method)

        test_data["algorithm"] = "ctw"
        test_data["trained_with"] = "same_user"
        test_data["train_size"] = len(tags_sequence)
        test_data["test_size"] = len(tags_sequence)

        if predict_choice_method == "random_dummy":
            test_data["is_dummy"] = True
        else:
            test_data["is_dummy"] = False

        test_data["pred_choice_method"] = predict_choice_method

        test_data["method"] = "single_partition"

        test_data["k"] = None
        test_data["iteration"] = repeat_i

        test_data["user_id"] = user_id

        test_data["is_distributive"] = False
        test_data["input_data_version"] = input_data_version

        test_data["test_id"] = execution_id

        if save_result:
            experiments_dao.save_execution_test_data(result_dict=test_data,
                                                     filename=dir_name + "/" +
                                                     test_data["test_id"] +
                                                     "_i_{}".format(repeat_i))
예제 #3
0
def evaluation_markov_cluster_light_mem(cluster, test_user_id, input_data_version, cluster_version,
                                        dir_name, repeats_n=3, is_distributive=False,
                                        random_dummy_mode=None, save_result=True):

    execution_id = str(uuid.uuid4())

    if len(cluster) <= 1:
        raise exceptions.ClusterSizeInadequate

    user_tags, rest_cluster = partition_dict_by_keys_one_vs_all(a_dict=cluster, split_key=test_user_id)

    for repeat_i in range(repeats_n):

        test_data = test_markov_cluster(train_cluster=[rest_cluster[user_id] for user_id in rest_cluster.keys()],
                                        test=user_tags,
                                        is_distributive=is_distributive,
                                        random_dummy_mode=random_dummy_mode)

        test_data["algorithm"] = "markov"
        test_data["trained_with"] = "cluster"
        test_data["train_size"] = sum([len(rest_cluster[train_user_id]) for train_user_id in rest_cluster.keys()])
        test_data["test_size"] = len(user_tags)

        if random_dummy_mode is None:
            test_data["is_dummy"] = False
        else:
            test_data["is_dummy"] = True

        test_data["method"] = "cluster"
        test_data["cluster_size"] = len(cluster)

        test_data["k"] = None
        test_data["iteration"] = repeat_i

        test_data["user_id"] = test_user_id
        test_data["multi_trip"] = False

        test_data["is_distributive"] = is_distributive
        test_data["input_data_version"] = input_data_version
        test_data["cluster_version"] = cluster_version

        test_data["test_id"] = execution_id

        if save_result:
            experiments_dao.save_execution_test_data(result_dict=test_data,
                                                     filename=dir_name + "/" + test_data["test_id"] + "_i_{}".format(repeat_i))
예제 #4
0
def evaluation_markov_k_fold_light_mem(tags_sequence, user_id, input_data_version, dir_name, k=5, is_distributive=False, random_dummy_mode=None, save_result=True):
    if len(tags_sequence) <= 1:
        print("sr_group size: {} \n skipping".format(len(tags_sequence)))
        raise exceptions.TooShortStopRegionGroup()

    k_fold_partitions = k_fold_iteration(tags_sequence, k)

    execution_id = str(uuid.uuid4())

    for i in range(len(k_fold_partitions)):
        partition = k_fold_partitions[i]
        train = partition["train"]
        test = partition["test"]

        test_data = test_markov(train, test, is_distributive=is_distributive, random_dummy_mode=random_dummy_mode)

        test_data["algorithm"] = "markov"
        test_data["trained_with"] = "same_user"
        test_data["train_size"] = len(train)
        test_data["test_size"] = len(test)

        if random_dummy_mode is None:
            test_data["is_dummy"] = True
        else:
            test_data["is_dummy"] = False

        test_data["method"] = "k_fold"

        test_data["k"] = k
        test_data["iteration"] = i

        test_data["user_id"] = user_id

        test_data["is_distributive"] = is_distributive
        test_data["input_data_version"] = input_data_version

        test_data["test_id"] = execution_id

        if save_result:
            experiments_dao.save_execution_test_data(result_dict=test_data, filename=dir_name + "/" + test_data["test_id"] + "_i_{}".format(i))