Ejemplo n.º 1
0
def main(args):
    genereal_params = load_yaml(args.general)
    data_params = load_yaml(args.data)
    default_params = load_yaml(args.default)
    tune_params = load_yaml(args.tune)
    genereal_params['verbose'] = args.verbose
    genereal_params['cuda'] = torch.cuda.is_available()
    genereal_params['train_val'] = args.train_val
    if args.trick:
        default_trick[args.trick] = True
    genereal_params['trick'] = default_trick
    final_default_params = SimpleNamespace(**genereal_params, **data_params,
                                           **default_params)

    time_start = time.time()
    print(final_default_params)
    print()

    #reproduce
    np.random.seed(final_default_params.seed)
    random.seed(final_default_params.seed)
    torch.manual_seed(final_default_params.seed)
    if final_default_params.cuda:
        torch.cuda.manual_seed(final_default_params.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    #run
    multiple_run_tune_separate(final_default_params, tune_params,
                               args.save_path)
Ejemplo n.º 2
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(
        args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(
        R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(
        path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    parameters = find_best_hyperparameters(table_path + args.dataset_name,
                                           'NDCG')
    parameters_row = parameters.loc[parameters['model'] == args.model]

    if args.dataset_name == "yelp/":
        R_train_item_keyphrase = R_train_item_keyphrase.T

    start_time = time.time()

    results = critiquing(
        matrix_Train=R_train,
        matrix_Test=R_test,
        keyphrase_freq=R_train_keyphrase,
        item_keyphrase_freq=R_train_item_keyphrase,
        num_users_sampled=args.num_users_sampled,
        num_items_sampled=args.num_items_sampled,
        max_iteration_threshold=args.max_iteration_threshold,
        dataset_name=args.dataset_name,
        model=models[args.model],
        parameters_row=parameters_row,
        critiquing_model_name=args.critiquing_model_name,
        keyphrase_selection_method=args.keyphrase_selection_method,
        topk=args.topk,
        lamb=args.lamb)

    print("Final Time Elapsed: {}".format(inhour(time.time() - start_time)))

    table_path = load_yaml('config/global.yml', key='path')['tables']
    save_dataframe_csv(results, table_path, args.save_path)
Ejemplo n.º 3
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG')
    # parameters_row = parameters.loc[parameters['model'] == args.model]

    parameters_row = {
        'iter' : 10,
        'lambda' : 200,
        'rank' : 200 
    }
    
    keyphrases_names = load_dataframe_csv(path = args.data_dir, name = "Keyphrases.csv")['Phrases'].tolist()
    
    results = critiquing(matrix_Train=R_train,
                         matrix_Test=R_test,
                         keyphrase_freq=R_train_keyphrase,
                         item_keyphrase_freq=R_train_item_keyphrase.T,
                         num_users_sampled=args.num_users_sampled,
                         num_items_sampled=args.num_items_sampled,
                         max_iteration_threshold=args.max_iteration_threshold,
                         dataset_name=args.dataset_name,
                         model=models[args.model],
                         parameters_row=parameters_row,
                         critiquing_model_name=args.critiquing_model_name,
                         lamb = args.lambdas,
                         keyphrases_names = keyphrases_names,
                         keyphrase_selection_method = args.keyphrase_selection_method)

    table_path = load_yaml('config/global.yml', key='path')['tables']
    save_dataframe_csv(results, table_path, args.save_path)
Ejemplo n.º 4
0
def run(options):
    ''' Defines and executes apache beam pipeline

    :param options: `apache_beam.options.pipeline_options import PipelineOptions` object containing arguments that
    should be used for running the Beam job.
    '''
    with beam.Pipeline(options=options) as pipeline:

        # Load configs
        metadata = io.load_yaml(filename='configs/metadata.yml')

        # Specify query
        hist_trans_query = io.load_sql_query('sql/pull_hist_trans.sql').format(
            start_date=metadata['input_start_date'],
            end_date=metadata['input_end_date'])

        # Have dataflow pipeline pull the query, and write to CSV
        _ = (pipeline
             | 'Pull data for historical transactions' >> beam.io.Read(
                 beam.io.BigQuerySource(query=hist_trans_query,
                                        use_standard_sql=True))
             | 'Write data to CSV' >> io.DumpToCSV(
                 header=metadata['output_headers'],
                 file_path_prefix=metadata['output_file'],
                 file_name_suffix='',
                 compression_type=CompressionTypes.AUTO,
                 shard_name_template=''))
Ejemplo n.º 5
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path + args.tuning_result_path,
                                   'NDCG')

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
    R_test = load_numpy(path=args.data_dir, name=args.test_set)

    R_train = R_train + R_valid

    topK = [5, 10, 15, 20, 50]

    frame = []
    for idx, row in df.iterrows():
        start = timeit.default_timer()
        row = row.to_dict()
        row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"]
        row['topK'] = topK
        result = general(R_train,
                         R_test,
                         row,
                         models[row['model']],
                         measure=row['similarity'],
                         gpu_on=args.gpu,
                         model_folder=args.model_folder)
        stop = timeit.default_timer()
        print('Time: ', stop - start)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.save_path)
Ejemplo n.º 6
0
def show_uncertainty(df,
                     x,
                     y,
                     hue='model',
                     folder='unknown',
                     name='uncertainty_analysis',
                     save=True):
    fig, ax = plt.subplots(figsize=(6, 4))
    models = df[hue].unique()
    for model in models:
        sns.regplot(x=x,
                    y=y,
                    data=df[df[hue] == model],
                    lowess=True,
                    scatter=False,
                    ax=ax,
                    label=model)
    plt.ylabel("Model Uncertainty \n (Standard Derivation)")
    plt.xlabel("Number of Ratings")
    plt.legend(loc='center right')
    plt.tight_layout()
    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        fig.savefig('{0}/analysis/{1}/{2}.pdf'.format(fig_path, folder, name),
                    bbox_inches="tight",
                    pad_inches=0,
                    format='pdf')
        fig.savefig('{0}/analysis/{1}/{2}.png'.format(fig_path, folder, name),
                    bbox_inches="tight",
                    pad_inches=0,
                    format='png')
    else:
        plt.show()
Ejemplo n.º 7
0
 def load_config(cls, config_file_uri, environment):
     """
     Loads config from file and returns.
     """
     config_file_path = io_utils.fetch_file(config_file_uri)
     config_file = io_utils.load_yaml(config_file_path)
     return config_file[environment]
Ejemplo n.º 8
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path + args.problem, 'NDCG')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    R_test = load_numpy(path=args.path, name=args.test)

    R_train = R_train + R_valid

    topK = [5, 10, 15, 20, 50]

    frame = []
    for idx, row in df.iterrows():
        start = timeit.default_timer()
        row = row.to_dict()
        row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"]
        row['topK'] = topK
        result = execute(R_train,
                         R_test,
                         row,
                         models[row['model']],
                         gpu_on=args.gpu)
        stop = timeit.default_timer()
        print('Time: ', stop - start)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.name)
Ejemplo n.º 9
0
def main(args):

    params = load_yaml(args.parameters)

    num_users = pd.read_csv(args.data_dir + args.user_col + '.csv')[args.user_col].nunique()
    num_items = pd.read_csv(args.data_dir + args.item_col + '.csv')[args.item_col].nunique()

    df_train = pd.read_csv(args.data_dir + args.train_set)
    df_train = df_train[df_train[args.rating_col] == 1]
    df_train[args.keyphrase_vector_col] = df_train[args.keyphrase_vector_col].apply(ast.literal_eval)

    df_test = pd.read_csv(args.data_dir + args.test_set)

    keyphrase_names = pd.read_csv(args.data_dir + args.keyphrase_set)[args.keyphrase_col].values

    explain(num_users,
            num_items,
            args.user_col,
            args.item_col,
            args.rating_col,
            args.keyphrase_vector_col,
            df_train,
            df_test,
            keyphrase_names,
            params,
            load_path=args.load_path,
            save_path=args.save_path)
Ejemplo n.º 10
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path+args.tuning_result_path, 'MAP@10')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    R_test = load_numpy(path=args.path, name=args.test)

    R_train = R_train + R_valid

#    R_train[(R_train <= 3).nonzero()] = 0
#    R_test[(R_test <= 3).nonzero()] = 0

#    R_train[(R_train > 3).nonzero()] = 1
#    R_test[(R_test > 3).nonzero()] = 1
#    import ipdb; ipdb.set_trace()

    topK = [5, 10, 15, 20, 50]

    frame = []
    for idx, row in df.iterrows():
        start = timeit.default_timer()
        row = row.to_dict()
        row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"]
        row['topK'] = topK
        result = execute(R_train, R_test, row, models[row['model']])
        stop = timeit.default_timer()
        print('Time: ', stop - start)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.name)
Ejemplo n.º 11
0
def pandas_ridge_plot(df,
                      model,
                      pop,
                      k,
                      folder='figures',
                      name='personalization',
                      save=True):
    sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
    num_models = len(df.model.unique())

    # Initialize the FacetGrid object
    pal = sns.cubehelix_palette(num_models, rot=-.25, light=.7)
    g = sns.FacetGrid(df,
                      row=model,
                      hue=model,
                      aspect=10,
                      height=1,
                      palette=pal)

    # Draw the densities in a few steps
    g.map(sns.kdeplot, pop, clip_on=False, shade=True, alpha=1, lw=1.5, bw=50)
    g.map(sns.kdeplot, pop, clip_on=False, color="w", lw=1.5, bw=50)
    g.map(plt.axhline, y=0, lw=2, clip_on=False)

    # Define and use a simple function to label the plot in axes coordinates
    def label(x, color, label):
        ax = plt.gca()
        ax.text(-0.1,
                .1,
                label,
                fontweight="bold",
                color=color,
                ha="left",
                va="center",
                transform=ax.transAxes)

    g.map(label, pop)

    # Set the subplots to overlap
    g.fig.subplots_adjust(hspace=-0.8)

    # Remove axes details that don't play well with overlap

    g.set_xlabels(
        "Popularity Distribution of The Top-{0} Recommended Items".format(k))
    g.set_titles("")
    g.set(yticks=[])
    g.despine(bottom=True, left=True)
    #plt.tight_layout()
    if save:
        pass
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        plt.savefig("{2}/{0}/{1}.pdf".format(folder, name, fig_path),
                    format="pdf")
        plt.savefig("{2}/{0}/{1}.png".format(folder, name, fig_path),
                    format="png")
    else:
        plt.show()
    plt.close()
Ejemplo n.º 12
0
def main(args):
    progress = WorkSplitter()
    progress.section("Tune Parameters")
    params = load_yaml(args.grid)
    params['models'] = {params['models']: models[params['models']]}
    train = load_numpy(path=args.path, name=args.dataset + args.train)
    unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train)
    valid = load_numpy(path=args.path, name=args.dataset + args.valid)
    hyper_parameter_tuning(train, valid, params, unif_train=unif_train, save_path=args.dataset + args.name,
                           gpu_on=args.gpu, seed=args.seed, way=args.way, dataset=args.dataset)
Ejemplo n.º 13
0
def main(args):
    params = load_yaml(args.grid)
    params['models'] = {params['models']: models[params['models']]}
    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    hyper_parameter_tuning(R_train,
                           R_valid,
                           params,
                           save_path=args.name,
                           measure=params['similarity'],
                           gpu_on=args.gpu)
Ejemplo n.º 14
0
def read(conf, path, func, write=True, update=False):
    full_path = os.path.join(conf.data_folder, f"{path}.yaml")

    if not update and os.path.exists(full_path):
        logger.debug(f"Load value of '{path}'", prefix=conf.logger_prefix)
        return io.load_yaml(full_path)

    if not write:
        return func()

    return _write(conf, path, func)
Ejemplo n.º 15
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path+args.param, 'NDCG')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)

    results = converge(R_train, R_valid, df, table_path, args.name, epochs=500, gpu_on=args.gpu)

    show_training_progress(results, hue='model', metric='NDCG', name="epoch_vs_ndcg")
Ejemplo n.º 16
0
def pandas_bar_plot(df,
                    x,
                    y,
                    hue,
                    x_name,
                    y_name,
                    folder='figures',
                    name='unknown',
                    save=True):
    fig, ax = plt.subplots(figsize=(6, 3))
    sns.barplot(ax=ax,
                x=x,
                y=y,
                hue=hue,
                data=df,
                errwidth=1,
                edgecolor='black',
                facecolor=(1, 1, 1, 0))  #, errwidth=0.5

    num_category = len(df[x].unique())
    hatch = None
    hatches = itertools.cycle([
        '//', '**', '////', '----', 'xxxx', '\\\\\\\\', ' ', '\\', '...',
        'OOO', "++++++++"
    ])
    for i, bar in enumerate(ax.patches):
        if i % num_category == 0:
            hatch = next(hatches)
        bar.set_hatch(hatch)

    plt.xlabel(x_name)
    plt.ylabel(y_name)
    #plt.xticks(rotation=15)
    plt.legend(loc='upper left', ncol=5)
    # if 'Precision' not in y:
    ax.legend_.remove()
    plt.tight_layout()
    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        plt.savefig("{2}/{0}/{1}_bar.pdf".format(folder, name, fig_path),
                    format="pdf")
        plt.savefig("{2}/{0}/{1}_bar.png".format(folder, name, fig_path),
                    format="png")

        fig_leg = plt.figure(figsize=(12, 0.7))
        ax_leg = fig_leg.add_subplot(111)
        ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol=10)
        ax_leg.axis('off')
        fig_leg.savefig('figs/bar_legend.pdf', format='pdf')

    else:
        plt.show()
    plt.close()
def main(args):
    genereal_params = load_yaml(args.general)
    data_params = load_yaml(args.data)
    agent_params = load_yaml(args.agent)
    genereal_params['verbose'] = args.verbose
    genereal_params['cuda'] = torch.cuda.is_available()
    final_params = SimpleNamespace(**genereal_params, **data_params,
                                   **agent_params)
    time_start = time.time()
    print(final_params)

    #reproduce
    np.random.seed(final_params.seed)
    random.seed(final_params.seed)
    torch.manual_seed(final_params.seed)
    if final_params.cuda:
        torch.cuda.manual_seed(final_params.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    #run
    multiple_run(final_params)
Ejemplo n.º 18
0
def hyper_parameter_tuning(train, validation, params, save_path):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=['model', 'k', 'topK'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for k in params['k']:

            if ((df['model'] == algorithm) & (df['k'] == k)).any():
                continue

            format = "model: {}, k: {}"
            progress.section(format.format(algorithm, k))

            progress.subsection("Training")
            model = params['models'][algorithm]()
            model.train(train)

            progress.subsection("Prediction")
            prediction_score = model.predict(train, k=k)

            prediction = predict(prediction_score=prediction_score,
                                 topK=params['topK'][-1],
                                 matrix_Train=train)

            progress.subsection("Evaluation")
            result = evaluate(prediction, validation, params['metric'],
                              params['topK'])

            result_dict = {'model': algorithm, 'k': k}

            for name in result.keys():
                result_dict[name] = [
                    round(result[name][0], 4),
                    round(result[name][1], 4)
                ]

            df = df.append(result_dict, ignore_index=True)

            save_dataframe_csv(df, table_path, save_path)
Ejemplo n.º 19
0
def show_training_progress(df,
                           hue='model',
                           metric='NDCG',
                           name="epoch_vs_ndcg",
                           save=True):
    fig, ax = plt.subplots(figsize=(8, 4))
    #plt.axhline(y=0.165, color='r', linestyle='-')
    ax = sns.lineplot(x='epoch', y=metric, hue=hue, style=hue, data=df)
    plt.tight_layout()
    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        fig.savefig('{0}/train/progress/{1}.png'.format(fig_path, name),
                    bbox_inches="tight",
                    pad_inches=0,
                    format='png')
    else:
        plt.show()
def main(args):
    params = load_yaml(args.parameters)

    params['models'] = {params['models']: models[params['models']]}

    num_users = pd.read_csv(args.data_dir + args.user_col +
                            '.csv')[args.user_col].nunique()
    num_items = pd.read_csv(args.data_dir + args.item_col +
                            '.csv')[args.item_col].nunique()

    df_train = pd.read_csv(args.data_dir + args.train_set)
    df_train = df_train[df_train[args.rating_col] == 1]
    df_train[args.keyphrase_vector_col] = df_train[
        args.keyphrase_vector_col].apply(ast.literal_eval)

    df_valid = pd.read_csv(args.data_dir + args.valid_set)

    keyphrase_names = pd.read_csv(args.data_dir + args.keyphrase_set)[
        args.keyphrase_col].values

    if args.explanation:
        explanation_parameter_tuning(num_users,
                                     num_items,
                                     args.user_col,
                                     args.item_col,
                                     args.rating_col,
                                     args.keyphrase_vector_col,
                                     df_train,
                                     df_valid,
                                     keyphrase_names,
                                     params,
                                     save_path=args.save_path)
    else:
        hyper_parameter_tuning(num_users,
                               num_items,
                               args.user_col,
                               args.item_col,
                               args.rating_col,
                               args.keyphrase_vector_col,
                               df_train,
                               df_valid,
                               keyphrase_names,
                               params,
                               save_path=args.save_path)
Ejemplo n.º 21
0
def precision_recall_curve(df,
                           k,
                           folder='figures',
                           name='precison_recall',
                           save=True,
                           reloaded=False):
    fig, ax = plt.subplots(figsize=(4, 3))
    precisions = ["Precision@" + str(x) for x in k]
    recalls = ["Recall@" + str(x) for x in k]

    markers = ['o', '^', 's', '*', 'P', 'h', 'd', '3', 'X', 'v', '+']
    for i in range(len(df)):
        if reloaded:
            precision = [
                literal_eval(x)[0] for x in df[precisions].iloc[i].tolist()
            ]
            recall = [literal_eval(x)[0] for x in df[recalls].iloc[i].tolist()]
        else:
            precision = [x[0] for x in df[precisions].iloc[i].tolist()]
            recall = [x[0] for x in df[recalls].iloc[i].tolist()]
        ax.plot(recall,
                precision,
                label=df['model'].iloc[i],
                marker=markers[i])

    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.tight_layout()
    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        plt.savefig("{2}/{0}/{1}_curve.pdf".format(folder, name, fig_path),
                    format="pdf")
        plt.savefig("{2}/{0}/{1}_curve.png".format(folder, name, fig_path),
                    format="png")

        fig_leg = plt.figure(figsize=(12, 0.7))
        ax_leg = fig_leg.add_subplot(111)
        ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol=10)
        ax_leg.axis('off')
        fig_leg.savefig('figs/legend.pdf', format='pdf')

    else:
        plt.show()
    plt.close()
Ejemplo n.º 22
0
    def test_inject_token_auth(self):
        auth_token = "abc123456"
        endpoint = "https://test.com/?token={0}".format(auth_token)
        target_env = "dev"

        # Sets the dictionary to be returned by a call to load_config
        config_file = fetch_file(resource_uri("test_config_with_auth_token.yaml"))
        config_yaml = load_yaml(config_file)
        dummy_utils = ConfigUtilsDummy({'load_config': config_yaml[target_env]})

        config = ConfigFactory.create_from_file(
            environment=target_env,
            config_file_uri="some dummy uri",
            auth_token=auth_token,
            validate_contract_settings=False,
            config_utils=dummy_utils,
        )
        self.assertEqual(config.auth_token, auth_token)
        self.assertEqual(config.eth_provider_args['endpoint_uri'], endpoint)
Ejemplo n.º 23
0
def show_samples(images, row, col, image_shape, name="Unknown", save=True, shift=False):
    num_images = row*col
    if shift:
        images = (images+1.)/2.
    fig = plt.figure(figsize=(col, row))
    grid = ImageGrid(fig, 111,
                     nrows_ncols=(row, col),
                     axes_pad=0.)
    for i in xrange(num_images):
        im = images[i].reshape(image_shape)
        axis = grid[i]
        axis.axis('off')
        axis.imshow(im)
    plt.axis('off')
    plt.tight_layout()
    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        fig.savefig('{0}/train/grid/{1}.png'.format(fig_path, name), bbox_inches="tight", pad_inches=0, format='png')
    else:
        plt.show()
Ejemplo n.º 24
0
def multi_modes_count(df, y='modes', save=True):
    def udf(row):
        value = row['y']
        if value == 1.0:
            return "One Mode"
        elif value == 2.0:
            return "Two Modes"
        elif value == 3.0:
            return "Three Modes"
        elif value == 4.0:
            return "Four Modes"
        else:
            return "Five Modes"

    df = df.sort_values(by=['y'])
    df['modes'] = df.apply(udf, axis=1)

    fig, ax = plt.subplots(figsize=(3, 2.5))

    sns.countplot(x=y, data=df)

    hatches = itertools.cycle(['//', 'xx', '\\\\\\'])
    for i, bar in enumerate(ax.patches):
        hatch = next(hatches)
        bar.set_hatch(hatch)

    plt.xlabel("Number of User Preferences")
    plt.ylabel("Number of Users")
    plt.xticks(rotation=15)

    plt.legend()
    plt.tight_layout()

    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        plt.savefig("{0}/modes_count.pdf".format(fig_path), format="pdf")
        plt.savefig("{0}/modes_count.png".format(fig_path), format="png")

    else:
        plt.show()
    plt.close()
def main(args):
    params = load_yaml(args.parameters)

    params['models'] = {params['models']: models[params['models']]}

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set)
    R_valid_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.valid_keyphrase_set)
    R_train_keyphrase[R_train_keyphrase != 0] = 1
    R_valid_keyphrase[R_valid_keyphrase != 0] = 1

    hyper_parameter_tuning(R_train,
                           R_valid,
                           R_train_keyphrase.todense(),
                           R_valid_keyphrase,
                           params,
                           save_path=args.save_path,
                           tune_explanation=args.tune_explanation)
Ejemplo n.º 26
0
def show_training_progress(df,
                           hue='model',
                           metric='NDCG',
                           name="epoch_vs_ndcg",
                           save=True):
    fig, ax = plt.subplots(figsize=(6, 3))
    df = df.sort_values(by=['model'])
    #plt.axhline(y=0.165, color='r', linestyle='-')
    ax = sns.lineplot(x='epoch', y=metric, hue=hue, style=hue, data=df, ci=68)
    ax.set_xlabel("Epoch")
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles=handles[1:], labels=labels[1:])
    plt.tight_layout()
    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        fig.savefig('{0}/{1}.pdf'.format(fig_path, name),
                    bbox_inches="tight",
                    pad_inches=0,
                    format='pdf')
    else:
        plt.show()
Ejemplo n.º 27
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']
    df = find_best_hyperparameters(table_path + args.problem, 'NDCG')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    R_test = load_numpy(path=args.path, name=args.test)

    R_train = R_train + R_valid

    topK = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
    metric = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP']

    usercategory(R_train,
                 R_test,
                 df,
                 topK,
                 metric,
                 args.problem,
                 args.model_folder,
                 gpu_on=args.gpu)
Ejemplo n.º 28
0
def main(args):

    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path + args.problem, 'NDCG')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    R_test = load_numpy(path=args.path, name=args.test)

    R_train = R_train + R_valid

    topK = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

    personalization(R_train,
                    R_test,
                    df,
                    topK,
                    args.problem,
                    args.model_folder,
                    gpu_on=args.gpu)
Ejemplo n.º 29
0
def multi_modes_histogram(df, x='x', y='y', save=True):

    fig, ax = plt.subplots(figsize=(4, 3))
    sns.distplot(df[df[y] == 1.0][x], kde=False, label="One Mode")
    sns.distplot(df[df[y] == 2.0][x], kde=False, label="Two Modes")
    sns.distplot(df[df[y] == 3.0][x], kde=False, label="Three Modes")

    plt.xlabel("Number of Observed Interactions")
    plt.ylabel("Frequency")

    plt.legend()
    plt.tight_layout()

    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        plt.savefig("{0}/multi_mode.pdf".format(fig_path), format="pdf")
        plt.savefig("{0}/multi_mode.png".format(fig_path), format="png")

    else:
        plt.show()
    plt.close()
Ejemplo n.º 30
0
def latent_distribution_ellipse(means, stds, keep_rate, lim=6, name="Unknown", save=True):
    fig, ax = plt.subplots(figsize=(4, 4))
    patches = []
    m, _ = means.shape
    colors = sns.color_palette("Blues", 10).as_hex()
    plt.axis('equal')
    handles = []
    for i in range(m):
        ellipse = mpatches.Ellipse(means[i], stds[i][0], stds[i][1],
                                   edgecolor=colors[i], lw=3, facecolor='none', label="{:10.4f}".format(keep_rate**(i+1)))
        handles.append(ellipse)
        ax.add_artist(ellipse)

    ax.set(xlim=[-lim, lim], ylim=[-lim, lim])
    #plt.axis('off')
    plt.legend(handles=handles)
    plt.tight_layout()
    if save:
        fig_path = load_yaml('config/global.yml', key='path')['figs']
        fig.savefig('{0}/train/grid/{1}.png'.format(fig_path, name), bbox_inches="tight", pad_inches=0, format='png')
    else:
        plt.show()