Exemple #1
0
def set_trainer(config):

    # load a checkpoint
    if config.checkpoint is not None:
        # load data
        train_loader = load_data(config, 'train', False)
        model, optimizer, word_map, start_epoch = load_checkpoint(config.checkpoint, device)
        print('\nLoaded checkpoint from epoch %d.\n' % (start_epoch - 1))
    
    # or initialize model
    else:
        start_epoch = 0

        # load data
        train_loader, embeddings, emb_size, word_map, n_classes, vocab_size = load_data(config, 'train', True)

        model = models.setup(
            config = config, 
            n_classes = n_classes, 
            vocab_size = vocab_size,
            embeddings = embeddings, 
            emb_size = emb_size
        )

        optimizer = optim.Adam(
            params = filter(lambda p: p.requires_grad, model.parameters()), 
            lr = config.lr
        )

    # loss functions
    loss_function = nn.CrossEntropyLoss()

    # move to device
    model = model.to(device)
    loss_function = loss_function.to(device)

    trainer = Trainer(
        num_epochs = config.num_epochs,
        start_epoch = start_epoch,
        train_loader = train_loader,
        model = model, 
        model_name = config.model_name,
        loss_function = loss_function, 
        optimizer = optimizer,
        lr_decay = config.lr_decay,
        word_map = word_map,
        grad_clip = config.grad_clip, 
        print_freq = config.print_freq,
        checkpoint_path = config.checkpoint_path, 
        checkpoint_basename = config.checkpoint_basename
    )

    return trainer
Exemple #2
0
def main() -> None:
    
    parser = argparse.ArgumentParser(description="Flower")
    parser.add_argument(
        "--server_address",
        type=str,
        default=DEFAULT_SERVER_ADDRESS,
        help=f"gRPC server address (default: {DEFAULT_SERVER_ADDRESS})",
    )
    parser.add_argument(
        "--cid", type=str, required=True, help="Client CID (no default)"
    )
    parser.add_argument(
        "--log_host", type=str, help="Logserver address (no default)",
    )
    args = parser.parse_args()

    # Configure logger
    fl.common.logger.configure(f"client_{args.cid}", host=args.log_host)

    # Load model and data
    model = models.load_model(model_name=glb.MODEL, framework="PT")
    model.to(DEVICE)
    trainset, testset = datasets.load_data(dataset_name=glb.DATASET, framework="PT")

    # Start client
    client = PyTorchClient(args.cid, model, trainset, testset)
    try:
        fl.client.start_client(args.server_address, client)
    except:
        print("Either something went wrong or server finished execution!!")
Exemple #3
0
def main():

    model1 = 'vae'
    model2 = 'dcgan'
    dataset1 = ''
    epoch = 200
    batchsize = 50
    output1 = 'output'
    zdims = 256

    # Make output direcotiry if not exists
    if not os.path.isdir(output1):
        os.mkdir(output1)

    datasets = load_data(dataset1)

    # Construct model
    if model1 not in models:
        raise Exception('Unknown model:', model1)

    model = models[model1](input_shape=datasets.shape[1:],
                           z_dims=zdims,
                           output=output1)

    # Training loop
    datasets = datasets.images * 2.0 - 1.0
    samples = np.random.normal(size=(100, zdims)).astype(np.float32)
    model.main_loop(datasets,
                    samples,
                    epochs=epoch,
                    batchsize=batchsize,
                    reporter=['loss', 'g_loss', 'd_loss', 'g_acc', 'd_acc'])
Exemple #4
0
    def train(self,
              tabular_path: str,
              join_result_path: str,
              model_path: str,
              model_weights_path=None,
              histogram_path=None) -> None:
        """
        Train a classification model for spatial join cost estimator, then save the trained model to file
        """

        # Extract train and test data, but only use train data
        X_train, y_train = datasets.load_data(tabular_path,
                                              RankingModel.TARGET,
                                              RankingModel.DROP_COLUMNS)
        X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                          y_train,
                                                          test_size=0.2,
                                                          random_state=1)

        query_train = [X_train.shape[0]]
        query_val = [X_val.shape[0]]

        gbm = lgb.LGBMRanker()
        model = gbm.fit(X_train,
                        y_train,
                        group=query_train,
                        eval_set=[(X_val, y_val)],
                        eval_group=[query_val],
                        eval_at=[1, 2],
                        early_stopping_rounds=50)

        # Fit and save the model
        # model = self.rnk_model.fit(X_train, y_train)
        pickle.dump(model, open(model_path, 'wb'))
Exemple #5
0
def load_data(data_type):
    if data_type == 'mnist':
        return dsets.mnist.load_data()
    elif data_type == 'svhn':
        return dsets.svhn.load_data()
    else:
        return dsets.load_data(data_type)
Exemple #6
0
    def train(self,
              tabular_path: str,
              join_result_path: str,
              model_path: str,
              model_weights_path=None,
              histogram_path=None) -> None:
        """
        Train a classification model for spatial join cost estimator, then save the trained model to file
        """

        # Extract train and test data, but only use train data
        X_train, y_train, join_df = datasets.load_data(
            tabular_path, ClassificationModel.TARGET,
            ClassificationModel.DROP_COLUMNS,
            ClassificationModel.SELECTED_COLUMNS)

        # Fit and save the model
        model = self.clf_model.fit(X_train, y_train)
        pickle.dump(model, open(model_path, 'wb'))

        # Feature importances
        importances = model.feature_importances_

        output_f = open('data/temp/feature_importances.csv', 'w')
        output_f.writelines('feature_name,importance_score\n')

        for fname, fscore in zip(ClassificationModel.SELECTED_COLUMNS,
                                 importances):
            print('{},{}'.format(fname, fscore))
            output_f.writelines('{},{}\n'.format(fname, fscore))

        output_f.close()
Exemple #7
0
def plot_1d_experiment(params, output_path):
    gp, gp_list, s_list, loss_list = pickle_data(
        get_folder_name(params, output_path))

    data, X_range = datasets.load_data(params["data_name"])

    print X_range.shape
    h, w = X_range.shape
    for i in range(len(gp_list)):
        gpi = gp_list[i]
        gpi.Ytrain = -gpi.Ytrain
        gpi.Ytrain_original = -gpi.Ytrain_original
        gpi.mean = -gpi.mean
        X_new, Y_new = s_list[i]
        Y_new = -Y_new
        plt.title("Iteration: " + str((i + 1) * params["save_every"]))
        plt.xlabel('x')
        plt.ylabel('y')
        mean, cov = gpi.predict(X_range)
        var = cov.diagonal()[:, np.newaxis]
        plt.scatter(gpi.Xtrain,
                    gpi.Ytrain_original,
                    color='green',
                    marker='x',
                    s=50)  # training data
        plt.plot(X_range, mean, color='blue')  # GP mean
        plt.plot(X_range, mean + var, color='red')  # GP mean
        plt.plot(X_range, mean - var, color='red')  # GP mean
        plt.plot(X_range, 5 + 3 * data, color='black')  # GP mean - std
        plt.scatter(X_new, Y_new, color='purple', marker='*', s=100)  # data
        plt.xlim(-3 * np.pi, 3 * np.pi)
        plt.show()

        print X_new, Y_new
def run_experiment(params, output_path):
    folder_name = get_folder_name(params, output_path)
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    print "Started EXPERIMENTS for", folder_name

    k = 0
    for p in params["first_points"]:
        curr_time = current_milli_time() % 100000000000

        out_file = open(folder_name + str(curr_time) + "_stdout", 'w')
        save_stdout = sys.stdout
        sys.stdout = out_file

        data, X_range, X, Y, Z, is_us, goal_func = datasets.load_data(params["data_name"], True)

        if params["acqui_name"] == "lookahead":
            w_s, w_e = params["widths_range"]
            a_s, a_e = params["amps_range"]
            n_s, n_e = params["noise_range"]
            widths = np.logspace(np.log(w_s), np.log(w_e), num=10, base=np.e)
            amps = np.logspace(np.log(a_s), np.log(a_e), num=5, base=np.e)
            noises = np.logspace(np.log(n_s), np.log(n_e), num=5, base=np.e)

        # initialize gaussian process
        gp = gaussian_process.GP_Regressor(params["dimensions"], params["noise"], params["cov_func"],
                                           params["cov_grad"], [params["width"], params["amp"]])

        # print 'Max location', np.unravel_index(np.argmax(data), data.shape)

        #add first point
        X_new = X_range[p, :][np.newaxis, :]
        Y_new = goal_func(X_new)

        gp.update(X_new, Y_new)

        if params["acqui_name"] == "lookahead":
            goal_func = partial(goal_func, sign=-1)
            gp, gp_list, s_list, aux_list = GP_lookahead.lookahead_optimization(
                gp,
                params["iterations"],
                goal_func,
                X_range,
                [widths, amps, noises],
                lookahead_steps=params["steps"],
                save_every=params["save_every"])
        else:
            acqui_func = acquisition_functions.get_function(params["acqui_name"], kappa=params["kappa"], gamma=params["gamma"])
            # execute bayesian optimization
            gp, gp_list, s_list, aux_list = bayesian_optimization.bayesian_optimization(gp, params["iterations"], goal_func,
                                                                              acqui_func, X_range, params["bounds"],
                                                                              save_every=params["save_every"])

        pickle_data(folder_name, data=[gp, gp_list, s_list, aux_list], curr_time=curr_time)
        sys.stdout = save_stdout
        out_file.close()

        k += 1
        print "Experiment", k, "of", len(params["first_points"]), "done"
    def train(self,
              tabular_path: str,
              join_result_path: str,
              model_path: str,
              model_weights_path=None,
              histogram_path=None) -> None:
        """
        Train a regression model for spatial join cost estimator, then save the trained model to file
        """

        # Extract train and test data, but only use train data
        # X_train, y_train, X_test, y_test = datasets.load_tabular_features_hadoop(RegressionModel.DISTRIBUTION, RegressionModel.MATCHED, RegressionModel.SCALE, RegressionModel.MINUS_ONE)
        # X_train, y_train, X_test, y_test = datasets.load_tabular_features(join_result_path, tabular_path, RegressionModel.NORMALIZE, RegressionModel.MINUS_ONE, RegressionModel.TARGET)
        X_train, y_train, join_df = datasets.load_data(
            tabular_path, RegressionModel.TARGET, RegressionModel.DROP_COLUMNS,
            RegressionModel.SELECTED_COLUMNS)
        # X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1)
        # query_val = [X_val.shape[0]]
        #
        # Fit and save the model
        model = self.reg_model.fit(X_train, y_train)
        # model = LGBMRanker()
        # model.fit(X_train, y_train, eval_set=[(X_val, y_val)], eval_group=[query_val], eval_at=[1, 2],
        #           early_stopping_rounds=50)

        pickle.dump(model, open(model_path, 'wb'))
Exemple #10
0
    def test(self,
             tabular_path: str,
             join_result_path: str,
             model_path: str,
             model_weights_path=None,
             histogram_path=None) -> (float, float, float, float):
        """
        Evaluate the accuracy metrics of a trained  model for spatial join cost estimator
        :return mean_squared_error, mean_absolute_percentage_error, mean_squared_logarithmic_error, mean_absolute_error
        """

        # Extract train and test data, but only use test data
        # X_train, y_train, X_test, y_test = datasets.load_tabular_features_hadoop(RegressionModel.DISTRIBUTION, RegressionModel.MATCHED, RegressionModel.SCALE, RegressionModel.MINUS_ONE)
        # X_train, y_train, X_test, y_test = datasets.load_tabular_features(join_result_path, tabular_path, RegressionModel.NORMALIZE, RegressionModel.MINUS_ONE, RegressionModel.TARGET)
        X_test, y_test = datasets.load_data(tabular_path, RankingModel.TARGET,
                                            RankingModel.DROP_COLUMNS)

        # Load the model and use it for prediction
        loaded_model = pickle.load(open(model_path, 'rb'))
        y_pred = loaded_model.predict(X_test)

        # TODO: delete this dumping action. This is just for debugging
        test_df = pd.DataFrame()
        test_df['y_test'] = y_test
        test_df['y_pred'] = y_pred
        test_df.to_csv('data/temp/test_df.csv')

        # Compute accuracy metrics
        # ndcg = metrics.ndcg_score(y_test, y_pred)
        # acc = metrics.accuracy_score(y_test, y_pred)
        # print('Accuracy:', metrics.accuracy_score(y_test, y_pred))
        ndcg = 0
        return ndcg, ndcg, ndcg, ndcg
Exemple #11
0
def main(args):
    # 加载数据
    x, y = load_data(args.dataset)
    n_clusters = len(np.unique(y))

    # 设置参数
    if args.dataset == 'mnist' or args.dataset == 'fmnist':
        args.update_interval = 140
        args.pretrain_epochs = 301
        ae_weights_init = tf.variance_scaling_initializer(scale=1. / 3., mode='fan_in', distribution='uniform')
    # add feature dimension size to the beginning of hidden_dims
    feature_dim = x.shape[1]
    args.encoder_dims = [feature_dim] + args.encoder_dims
    print(args.encoder_dims)
    if args.pretrain == True:
        # 预训练
        print('Begin Pretraining')
        t0 = time()
        pretrainer = Pretrainer(args, ae_weights_init)
        saver = pretrainer(x, y)
        # print(saver)
        print('Pretraining time: %ds' % round(time() - t0))
    # 清理计算图
    tf.reset_default_graph()
    # Model训练
    print('Begin Model training')
    t1 = time()
    trainer = Trainer(args, ae_weights_init, n_clusters)
    trainer(x, y)
    print('Model training time: %ds' % round(time() - t1))
Exemple #12
0
def eval(config):
    pl.seed_everything(config["seed"])
    config["comment"] = f"{config['comment']}-seed-{config['seed']}"
    config["n_splits"] = 99999 if config["times"]<0 and config["mode"]=="leave one" else config["n_splits"]
    config["times"] = config["times"] if config["times"]>=0 else config["n_splits"]
    dataset = load_data(**config)
    predicts = []
    labels = []
    edges_index = []
    split_idxs = []
    for i, data in zip(range(config["times"]), dataset):
        if config["times"]!=config["n_splits"] and config["times"]!=i+1:
            continue
        model, log_dir = build_model(config, data)
        predict, label, edge_index = model.test_step(data, from_neighbor=config["from_neighbor"])
        predicts.append(predict)
        labels.append(label)
        edges_index.append(edge_index)
        split_idxs.append(torch.ones(len(predict), dtype=torch.int)*i)
        anaylse_result(predict, label, edge_index,
                       dataset, split_idxs[-1], model, log_dir,
                       save=config.get("save", False),
                       tag=f"split_{i}")
        model.info(f"split {i} end")
    model.info(f"{model}")
    model.info(f"{config}")
    predicts = torch.cat(predicts)
    labels = torch.cat(labels)
    edges_index = torch.cat(edges_index, dim=-1)
    split_idxs = torch.cat(split_idxs)
def main():
    # Parsing arguments
    parser = argparse.ArgumentParser(description='Training GANs or VAEs')
    parser.add_argument('--model', type=str, required=True)
    parser.add_argument('--dataset', type=str, required=True)
    parser.add_argument('--epoch', type=int, default=200)
    parser.add_argument('--batchsize', type=int, default=50)
    parser.add_argument('--datasize', type=int, default=-1)
    parser.add_argument('--output', default='output')
    parser.add_argument('--zdims', type=int, default=256)
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--resume', type=str, default=None)
    parser.add_argument('--testmode', action='store_true')

    args = parser.parse_args()

    # Select GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    # Make output direcotiry if not exists
    if not os.path.isdir(args.output):
        os.mkdir(args.output)

    # Load datasets
    if args.dataset == 'mnist':
        datasets = mnist.load_data()
    elif args.dataset == 'svhn':
        datasets = svhn.load_data()
    elif args.dataset == 'hair':
        datasets = hairdata.load_data()
    elif args.dataset == 'hair_4tags':
        datasets = hairdata_4tags.load_data()
    else:
        datasets = load_data(args.dataset)
    print('aa')
    # Construct model
    if args.model not in models:
        raise Exception('Unknown model:', args.model)

    model = models[args.model](input_shape=datasets.images.shape[1:],
                               num_attrs=len(datasets.attr_names),
                               z_dims=args.zdims,
                               output=args.output)

    if args.resume is not None:
        model.load_model(args.resume)

    # Training loop
    datasets.images = datasets.images * 2.0 - 1.0
    samples = np.random.normal(size=(10, args.zdims)).astype(np.float32)
    model.main_loop(datasets,
                    samples,
                    datasets.attr_names,
                    epochs=args.epoch,
                    batchsize=args.batchsize,
                    reporter=[
                        'loss', 'g_loss', 'd_loss', 'g_acc', 'd_acc', 'c_loss',
                        'ae_loss'
                    ])
Exemple #14
0
    def test(self,
             tabular_path: str,
             join_result_path: str,
             model_path: str,
             model_weights_path=None,
             histogram_path=None) -> (float, float, float, float):
        """
        Evaluate the accuracy metrics of a trained  model for spatial join cost estimator
        :return mean_squared_error, mean_absolute_percentage_error, mean_squared_logarithmic_error, mean_absolute_error
        """

        # Extract train and test data, but only use test data
        # X_train, y_train, X_test, y_test = datasets.load_tabular_features_hadoop(RegressionModel.DISTRIBUTION, RegressionModel.MATCHED, RegressionModel.SCALE, RegressionModel.MINUS_ONE)
        # X_train, y_train, X_test, y_test = datasets.load_tabular_features(join_result_path, tabular_path, RegressionModel.NORMALIZE, RegressionModel.MINUS_ONE, RegressionModel.TARGET)
        X_test, y_test, join_df = datasets.load_data(
            tabular_path, ClassificationModel.TARGET,
            ClassificationModel.DROP_COLUMNS,
            ClassificationModel.SELECTED_COLUMNS)

        # Load the model and use it for prediction
        loaded_model = pickle.load(open(model_path, 'rb'))
        y_pred = loaded_model.predict(X_test)

        # TODO: delete this dumping action. This is just for debugging
        test_df = pd.DataFrame()
        test_df['dataset1'] = join_df['dataset1']
        test_df['dataset2'] = join_df['dataset2']
        test_df['y_test'] = y_test
        test_df['y_pred'] = y_pred
        test_df.to_csv('data/temp/test_df.csv', index=None)

        # Compute accuracy metrics
        acc = metrics.accuracy_score(y_test, y_pred)
        print('Accuracy:', metrics.accuracy_score(y_test, y_pred))

        # Plot non-normalized confusion matrix
        titles_options = [
            ("figures/confusion_matrix_without_normalization.png", None),
            ("figures/confusion_matrix_with_normalization.png", 'true')
        ]
        class_names = ['BNLJ', 'PBSM', 'DJ', 'RepJ']
        for title, normalize in titles_options:
            plt.rcParams.update({'font.size': 14})
            disp = plot_confusion_matrix(loaded_model,
                                         X_test,
                                         y_test,
                                         display_labels=class_names,
                                         cmap=plt.cm.Blues,
                                         normalize=normalize)
            disp.ax_.set_title("")

            print(title)
            print(disp.confusion_matrix)
            plt.xlabel('Predicted algorithm', fontsize=16)
            plt.ylabel('Actual best algorithm', fontsize=16)
            plt.savefig(title)

        return acc, acc, acc, acc
Exemple #15
0
def main() -> None:

    parser = argparse.ArgumentParser(description="Flower")
    parser.add_argument(
        "--server_address",
        type=str,
        default=DEFAULT_SERVER_ADDRESS,
        help=f"gRPC server address (default: {DEFAULT_SERVER_ADDRESS})",
    )
    parser.add_argument("--cid",
                        type=str,
                        required=True,
                        help="Client CID (no default)")
    parser.add_argument(
        "--model",
        type=str,
        default="simple-cnn",
        help="Model to use for training (default: simple-cnn)",
    )
    parser.add_argument(
        "--dataset",
        type=str,
        default="cifar-10",
        help="Dataset to use fro training (default: cifar-10)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="CPU",
        help="Device to run the model on (default: CPU)",
    )
    parser.add_argument(
        "--log_host",
        type=str,
        help="Logserver address (no default)",
    )
    args = parser.parse_args()

    # Configure logger
    fl.common.logger.configure(f"client_{args.cid}", host=args.log_host)

    # check for runnable device
    global DEVICE
    DEVICE = torch.device("cuda:0" if args.device == "GPU" else "cpu")

    # Load model and data
    model = models.load_model(model_name=args.model, framework="PT")
    model.to(DEVICE)
    trainset, testset = datasets.load_data(dataset_name=args.dataset,
                                           framework="PT")

    # Start client
    client = PyTorchClient(args.cid, model, trainset, testset)
    try:
        fl.client.start_client(args.server_address, client)
    except:
        print("Either something went wrong or server finished execution!!")
def plot_experiment(params, output_path):
    folder_name = get_folder_name(params, output_path)
    all_experiments = set([f[:11] for f in os.listdir(folder_name) if utils.check_int(f[:11])])
    all_experiments = sorted(all_experiments)
    for experiment in all_experiments:
        gp, gp_list, s_list = pickle_data(folder_name, file_prefix=experiment)

        data, X_range, X, Y, Z, is_us, goal_func = datasets.load_data(params["data_name"], True)

        print X_range.shape
        h, w = X_range.shape
        for i in range(len(gp_list)):

            gpi = gp_list[i]
            X_new, Y_new = s_list[i]
            print X_new.shape
            mean = np.zeros((X_range.shape[0], 1))
            var = np.zeros((X_range.shape[0], 1))
            for j in range(X_range.shape[0]):
                mean[j], var[j] = gpi.predict(X_range[j, :][np.newaxis, :])
            print gpi.Xtrain.shape
            print gpi.Ytrain.shape
            print Z.shape, is_us.shape, mean.flatten().shape, var.flatten().shape
            Z[is_us] = mean.flatten()
            p.contourf(X, Y, Z)  # GP mean
            p.colorbar()
            p.scatter(gpi.Xtrain[:, 0], gpi.Xtrain[:, 1], color='green', marker='x', s=50)  # training data
            p.scatter(X_new[:, 0], X_new[:, 1], color='purple', marker='*', s=100)  # test data
            p.show()
            Z[is_us] = var.flatten() ** 0.5
            p.contourf(X, Y, Z)  # GP mean
            p.colorbar()
            p.scatter(gpi.Xtrain[:, 0], gpi.Xtrain[:, 1], color='green', marker='x', s=50)  # training data
            p.scatter(X_new[:, 0], X_new[:, 1], color='purple', marker='*', s=100)  # test data
            p.show()
            Z[is_us] = data.flatten()
            p.contourf(X, Y, Z)  # GP mean
            p.colorbar()
            p.scatter(gpi.Xtrain[:, 0], gpi.Xtrain[:, 1], color='green', marker='x', s=50)  # training data
            p.scatter(X_new[:, 0], X_new[:, 1], color='purple', marker='*', s=100)  # test data
            p.show()

            Z[is_us] = mean.flatten()
            fig = p.figure()
            ax = fig.gca(projection='3d')
            # surf = ax.plot_surface(X,Y,Z, rstride=1, cstride=1, cmap=cm.coolwarm,
            #                   linewidth=0, antialiased=False)
            ax.contourf(X, Y, Z)
            # fig.colorbar(surf, shrink=0.5, aspect=5)
            ax.set_zlim(-15, 90)
            # p.colorbar()
            ax.scatter(gpi.Xtrain[:, 0], gpi.Xtrain[:, 1], gpi.Ytrain_original, color='green', marker='x',
                       s=50)  # training data
            # p.scatter(X_new[:,0],X_new[:,1],color='purple',marker='*', s=100)   # test data
            p.show()
            print X_new, Y_new
Exemple #17
0
def main():

    print('Prepare dataset')
    # Dataset
    data_train, data_valid, data_test = datasets.load_data(
        args.dataset, args.data_path, args.representation, args.normalization)

    print('Create model')
    if args.representation == 'adj':
        print('\t* Discrete Edges')
        net = models.MpnnGGNN(in_size=2,
                              e=[1],
                              hidden_state_size=args.hidden_size,
                              message_size=args.hidden_size,
                              n_layers=args.nlayers,
                              discrete_edge=True,
                              out_type='regression',
                              target_size=data_train.getTargetSize())
    elif args.representation == 'feat':
        print('\t* Feature Edges')
        net = models.MpnnGGNN(in_size=2,
                              e=2,
                              hidden_state_size=args.hidden_size,
                              message_size=args.hidden_size,
                              n_layers=args.nlayers,
                              discrete_edge=False,
                              out_type='regression',
                              target_size=data_train.getTargetSize())
    else:
        raise NameError('Representation ' + args.representation +
                        ' not implemented!')

    print('Check CUDA')
    if args.cuda and args.ngpu > 1:
        print('\t* Data Parallel **NOT TESTED**')
        net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

    if args.cuda:
        print('\t* CUDA')
        net = net.cuda()

    if args.load is not None:
        print('Loading model')
        checkpoint = load_checkpoint(args.load)
        net.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']
        best_acc = checkpoint['best_acc']
    else:
        raise NameError('Load path must be set!')

    # Train
    plot_dataset(data_train, net, args.ngpu > 0)
    # Validation
    plot_dataset(data_valid, net, args.ngpu > 0)
    # Test
    plot_dataset(data_test, net, args.ngpu > 0)
def experiment(options):
    dataset_name = options['dataset_name']
    urep_class = options['urep_class']
    urep_ratio = options['urep_ratio']
    use_validation_step = options['use_validation_step']
    train_size = options['train_size']
    test_size = options['test_size']
    n_splits = options['n_splits']

    X, y = datasets.load_data(dataset_name)
    y = y.astype(int)
    n_classes = y.max() + 1

    if urep_class is not None:
        test_data_imbalance(y, urep_class)

    eval = [
        0,
        np.zeros(n_classes),
        np.zeros(n_classes), 0,
        np.zeros(n_classes),
        np.zeros(n_classes)
    ]
    if use_validation_step:
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=test_size,
                                     train_size=train_size)
        train_index, test_index = next(sss.split(X, y))
        val_index = np.delete(np.arange(X.shape[0]),
                              np.concatenate((train_index, test_index)))
        X_train, y_train = shuffle(X[train_index], y[train_index])
        X_val, y_val = shuffle(X[val_index], y[val_index])
        X_test, y_test = shuffle(X[test_index], y[test_index])
        if urep_class is not None and urep_ratio is not None:
            X_train, y_train = force_data_imbalance(X_train, y_train,
                                                    urep_class, urep_ratio)
            test_data_imbalance(y_train, urep_class)
        eval = train(X_train, y_train, X_val, y_val, X_test, y_test, options)
    else:
        skf = StratifiedKFold(n_splits=n_splits)
        for train_index, test_index in skf.split(X, y):
            X_train, y_train = shuffle(X[train_index], y[train_index])
            X_test, y_test = shuffle(X[test_index], y[test_index])
            if urep_class is not None and urep_ratio is not None:
                X_train, y_train = force_data_imbalance(
                    X_train, y_train, urep_class, urep_ratio)
                test_data_imbalance(y_train, urep_class)
            split_eval = train(X_train, y_train, None, None, X_test, y_test,
                               options)
            for i in range(6):
                eval[i] += split_eval[i] / n_splits

    for perf in eval:
        print(perf)

    return eval
Exemple #19
0
def _get_data_and_model(args):
    # prepare dataset
    if args.method in ['FcDEC', 'FcIDEC', 'FcDEC-DA', 'FcIDEC-DA']:
        x, y = load_data(args.dataset, args.subset_key)
    elif args.method in ['ConvDEC', 'ConvIDEC', 'ConvDEC-DA', 'ConvIDEC-DA']:
        x, y = load_data_conv(args.dataset, args.subset_key)
    else:
        raise ValueError(
            "Invalid value for method, which can only be in ['FcDEC', 'FcIDEC', 'ConvDEC', 'ConvIDEC', "
            "'FcDEC-DA', 'FcIDEC-DA', 'ConvDEC-DA', 'ConvIDEC-DA']")

    # prepare optimizer
    if args.optimizer in ['sgd', 'SGD']:
        optimizer = SGD(args.lr, 0.9)
    else:
        optimizer = Adam()

    # prepare the model
    if y is None:
        n_clusters = args.n_clusters
    else:
        n_clusters = len(np.unique(y))

    if 'FcDEC' in args.method:
        model = FcDEC(dims=[x.shape[-1], 500, 500, 2000, 10],
                      n_clusters=n_clusters)
        model.compile(optimizer=optimizer, loss='kld')
    elif 'FcIDEC' in args.method:
        model = FcIDEC(dims=[x.shape[-1], 500, 500, 2000, 10],
                       n_clusters=n_clusters)
        model.compile(optimizer=optimizer,
                      loss=['kld', 'mse'],
                      loss_weights=[0.1, 1.0])
    elif 'ConvDEC' in args.method:
        model = ConvDEC(input_shape=x.shape[1:],
                        filters=[32, 64, 128, 10],
                        n_clusters=n_clusters)
        model.compile(optimizer=optimizer, loss='kld')
    elif 'ConvIDEC' in args.method:
        model = ConvIDEC(input_shape=x.shape[1:],
                         filters=[32, 64, 128, 10],
                         n_clusters=n_clusters)
        model.compile(optimizer=optimizer,
                      loss=['kld', 'mse'],
                      loss_weights=[0.1, 1.0])
    else:
        raise ValueError(
            "Invalid value for method, which can only be in ['FcDEC', 'FcIDEC', 'ConvDEC', 'ConvIDEC', "
            "'FcDEC-DA', 'FcIDEC-DA', 'ConvDEC-DA', 'ConvIDEC-DA']")

    # if -DA method, we'll force aug_pretrain and aug_cluster is True
    if '-DA' in args.method:
        args.aug_pretrain = True
        args.aug_cluster = True

    return (x, y), model
Exemple #20
0
def divide_data(options):
    data, _, _, _ = load_data((options.dim, options.dim), options.data_path)
    random.shuffle(data)
    length = len(data)
    bound = int(length * options.data_prop)
    for i in range(bound):
        scipy.misc.imsave('../data/train/train_{0}.jpg'.format(i), data[i])
    for i in range(bound, length):
        scipy.misc.imsave('../data/test/test_{0}.jpg'.format(i - bound),
                          data[i])
Exemple #21
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("dataset")
    parser.add_argument(
        "--t_zero",
        default=None,
        type=int,
        help="Last task identifier before the first evaluation task identifier"
    )
    parser.add_argument("--history", default=0, type=int, help="History size")
    parser.add_argument("--backend",
                        default="dgl",
                        type=str,
                        choices=["dgl", "geometric"])
    parser.add_argument(
        "--basedir",
        help=
        "Basedir for preprocessed dataset, else create subdirectory in input")

    args = parser.parse_args()
    graph_or_edge_index, features, labels, years = load_data(
        args.dataset, backend=args.backend)
    basedir = args.basedir if args.basedir else args.dataset

    outdir = os.path.join(
        basedir,
        lifelong_nodeclf_identifier(args.dataset, args.t_zero, args.history,
                                    args.backend))

    # Cast to torch tensors
    features = torch.as_tensor(features, dtype=torch.float)
    labels = torch.as_tensor(labels, dtype=torch.long)
    years = torch.as_tensor(years, dtype=torch.long)

    if args.backend == "geometric":
        dataset = make_lifelong_nodeclf_dataset(outdir,
                                                years,
                                                features,
                                                labels,
                                                edge_index=graph_or_edge_index,
                                                t_zero=args.t_zero,
                                                cumulate=args.history)
    elif args.backend == 'dgl':
        dataset = make_lifelong_nodeclf_dataset(outdir,
                                                years,
                                                features,
                                                labels,
                                                dgl_graph=graph_or_edge_index,
                                                t_zero=args.t_zero,
                                                cumulate=args.history)
    else:
        raise ValueError("Unknown backend")

    print(dataset)
Exemple #22
0
def main() -> None:

    parser = argparse.ArgumentParser(description="Flower")
    parser.add_argument(
        "--server_address",
        type=str,
        default=DEFAULT_SERVER_ADDRESS,
        help=f"gRPC server address (default: {DEFAULT_SERVER_ADDRESS})",
    )
    parser.add_argument("--cid",
                        type=str,
                        required=True,
                        help="Client CID (no default)")
    parser.add_argument(
        "--model",
        type=str,
        default="simple-cnn",
        help="Model to use for training (default: simple-cnn)",
    )
    parser.add_argument(
        "--dataset",
        type=str,
        default="cifar-10",
        help="Dataset to use fro training (default: cifar-10)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="CPU",
        help="Device to run the model on (default: CPU)",
    )
    parser.add_argument(
        "--log_host",
        type=str,
        help="Logserver address (no default)",
    )
    args = parser.parse_args()

    # Configure logger
    fl.common.logger.configure(f"client_{args.cid}", host=args.log_host)

    # Load model and data
    model = models.load_model(model_name=args.model, framework="TF")
    xy_train, xy_test = datasets.load_data(dataset_name=args.dataset,
                                           framework="TF")

    # Start client
    keras_client = TfKerasClient(args.cid, model, xy_train, xy_test)
    client = fl.client.keras_client.KerasClientWrapper(keras_client)

    try:
        fl.client.start_client(args.server_address, client)
    except:
        print("Either something went wrong or server finished execution!!")
Exemple #23
0
def run_exp(dbs, da_s1, da_s2, expdir, ae_weights_dir, trials=5, verbose=0,
            pretrain_epochs=50, finetune_epochs=50, use_multiprocessing=True):
    # Log files
    if not os.path.exists(expdir):
        os.makedirs(expdir)
    logfile = open(expdir + '/results.csv', 'a')
    logwriter = csv.DictWriter(logfile, fieldnames=['trials', 'acc', 'nmi', 'time'])
    logwriter.writeheader()

    # Begin training on different datasets
    for db in dbs:
        logwriter.writerow(dict(trials=db, acc='', nmi='', time=''))

        # load dataset
        x, y = load_data(db)

        # setting parameters
        n_clusters = len(np.unique(y))
        dims = [x.shape[-1], 500, 500, 2000, 10]

        # Training
        results = np.zeros(shape=[trials, 3], dtype=float)  # init metrics before finetuning
        for i in range(trials):  # base
            t0 = time()
            save_dir = os.path.join(expdir, db, 'trial%d' % i)
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)

            # prepare model
            model = ASPC(dims, n_clusters)
            model.compile(optimizer=Adam(0.0001), loss='mse')

            # pretraining
            ae_weights = 'ae_weights.h5'
            if ae_weights_dir is None:
                model.pretrain(x, y, optimizer=SGD(1.0, 0.9), epochs=pretrain_epochs,
                               save_dir=save_dir, da_s1=da_s1, verbose=verbose, use_multiprocessing=use_multiprocessing)
                ae_weights = os.path.join(save_dir, ae_weights)
            else:
                ae_weights = os.path.join(ae_weights_dir, db, 'trial%d' % i, ae_weights)

            # finetuning
            results[i, :2] = model.fit(x, y, epochs=finetune_epochs if db!='fmnist' else 10, 
                                       da_s2=da_s2, save_dir=save_dir, ae_weights=ae_weights,
                                       use_multiprocessing=use_multiprocessing)
            results[i, 2] = time() - t0

        for t, line in enumerate(results):
            logwriter.writerow(dict(trials=t, acc=line[0], nmi=line[1], time=line[2]))
        mean = np.mean(results, 0)
        logwriter.writerow(dict(trials='avg', acc=mean[0], nmi=mean[1], time=mean[2]))
        logfile.flush()

    logfile.close()
Exemple #24
0
def main(_):
    # Parsing arguments
    parser = argparse.ArgumentParser(description='Training GANs or VAEs')
    parser.add_argument('--model', type=str, required=True)
    parser.add_argument('--dataset', type=str, required=True)
    parser.add_argument('--datasize', type=int, default=-1)
    parser.add_argument('--epoch', type=int, default=200)
    parser.add_argument('--batchsize', type=int, default=50)
    parser.add_argument('--output', default='output')
    parser.add_argument('--zdims', type=int, default=256)
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--resume', type=str, default=None)
    parser.add_argument('--testmode', action='store_true')

    args = parser.parse_args()

    # select gpu
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    # Make output direcotiry if not exists
    if not os.path.isdir(args.output):
        os.mkdir(args.output)

    # Load datasets
    if args.dataset == 'mnist':
        datasets = mnist.load_data()
    elif args.dataset == 'svhn':
        datasets = svhn.load_data()
    else:
        datasets = load_data(args.dataset, args.datasize)

    # Construct model
    if args.model not in models:
        raise Exception('Unknown model:', args.model)

    model = models[args.model](
        batchsize=args.batchsize,
        input_shape=datasets.shape[1:],
        attr_names=None or datasets.attr_names,
        z_dims=args.zdims,
        output=args.output,
        resume=args.resume
    )

    if args.testmode:
        model.test_mode = True

    tf.set_random_seed(12345)

    # Training loop
    datasets.images = datasets.images.astype('float32') * 2.0 - 1.0
    model.main_loop(datasets,
                    epochs=args.epoch)
def main():

    print('Prepare dataset')
    # Dataset
    data_train, data_valid, data_test = datasets.load_data(
        args.dataset, args.data_path, args.representation, args.normalization)

    # Data Loader
    train_loader = torch.utils.data.DataLoader(
        data_train,
        collate_fn=datasets.collate_fn_multiple_size,
        batch_size=args.batch_size,
        num_workers=args.prefetch,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        data_valid,
        batch_size=1,
        collate_fn=datasets.collate_fn_multiple_size,
        num_workers=args.prefetch,
        pin_memory=True)
    test_loader = torch.utils.data.DataLoader(
        data_test,
        batch_size=1,
        collate_fn=datasets.collate_fn_multiple_size,
        num_workers=args.prefetch,
        pin_memory=True)

    print('Create model')
    if args.distance == 'SoftHd':
        net = GraphEditDistance.SoftHd()
    else:
        net = GraphEditDistance.Hd()

    print('Loss & optimizer')
    evaluation = knn

    print('Check CUDA')
    if args.cuda and args.ngpu > 1:
        print('\t* Data Parallel **NOT TESTED**')
        net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

    if args.cuda:
        print('\t* CUDA')
        net = net.cuda()

    print('Validation')
    acc_valid = test(valid_loader, train_loader, net, args.ngpu > 0,
                     evaluation)

    # Evaluate best model in Test
    print('Test:')
    acc_test = test(test_loader, train_loader, net, args.ngpu > 0, evaluation)
Exemple #26
0
def main(_):
    """config.py の CLASSES、引数で処理を実施."""

    datasets = load_data(one_hot=True)

    if _.train:
        train(datasets, epochs=120)
    else:
        print(datasets.test.labels[:10])
        print(predict(datasets.test.images[:10]))
        print(predict([datasets.test.images[0]]))
        print(predict([datasets.test.images[0]], dtype='int'))
        print(predict([datasets.test.images[0]], dtype='argmax'))
Exemple #27
0
def evaluate_experiment(params, output_path):
    if params["data_name"] == "1d":
        data, X_range = datasets.load_data(params["data_name"])
    else:
        data, X_range, X, Y, Z, is_us = datasets.load_data(
            params["data_name"], True)

    gp, gp_list, s_list, loss_list = pickle_data(
        get_folder_name(params, output_path))

    if params["data_name"] == "ozone":
        goal_func = partial(utils.usa_goal_func,
                            data=data,
                            X_range=X_range,
                            sign=-1)
    elif params["data_name"] == "1d":
        goal_func = partial(utils.d1_goal_func, sign=-1)
    else:
        raise NameError("Data set name unknown.")

    y_opt = data[np.argmax(data)]
    x_opt = X_range[np.argmax(data), :]

    for i in range(len(gp_list)):
        gpi = gp_list[i]
        gpi.update_hyperparams_ml(params["bounds"])
        mean = np.zeros((X_range.shape[0], 1))
        var = np.zeros((X_range.shape[0], 1))
        for j in range(X_range.shape[0]):
            mean[j], var[j] = gpi.predict(X_range[j, :][np.newaxis, :])

        x_best = X_range[np.argmax(mean), :]
        x_first = gpi.Xtrain[0, :]

        print "Iteration:", (i + 1) * params["save_every"]
        print "Gap measure:", utils.gap_measure(goal_func, x_first, x_best,
                                                y_opt)
        print "Closeness measure:", utils.closeness_measure(x_best, x_opt)
Exemple #28
0
def main():
  # Parse hyperparams
  hparams = rebar.default_hparams
  hparams.parse(FLAGS.hparams)
  print(hparams.values())

  train_xs, valid_xs, test_xs = datasets.load_data(hparams)
  mean_xs = np.mean(train_xs, axis=0)  # Compute mean centering on training

  training_steps = 2000000
  model = getattr(rebar, hparams.model)
  sbn = model(hparams, mean_xs=mean_xs)

  scores = train(sbn, train_xs, valid_xs, test_xs,
                 training_steps=training_steps, debug=False)
Exemple #29
0
def do_fid_inception(args):
    args_json = json.load(open(os.path.join(args.dir, 'hps.txt')))
    vars(args).update(args_json)
    dset = load_data(args.dataset, True)
    ckpt_dir = tf.train.get_checkpoint_state(args.dir).model_checkpoint_path
    val_images = dset.test.images
    G = Graph(args, val_images.shape[1], val_images.shape[3])
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tfv1.train.Saver(keep_checkpoint_every_n_hours=2, max_to_keep=1)
    print('RESTORING WEIGHTS FROM', ckpt_dir)
    saver.restore(sess, ckpt_dir)
    score = compute_fid_inception(dset, sess, G)
    print('FID SCORE = {}'.format(score))
Exemple #30
0
def main(args):
    model = models.get_model(args.model, args.dataset)

    dataset = model.dataset
    train, test = datasets.load_data(dataset)
    train, validation = datasets.split_data(train, fractions=[0.8, 0.2])

    trainer.train(model, train, validation, args.epochs, args.batch_size,
                  args.learning_rate,
                  dataset_update=args.dataset_update, increments=args.splits,
                  use_ewc=args.ewc, ewc_lambda=args.ewc_lambda,
                  ewc_samples=args.ewc_samples,
                  use_fim=args.fim, fim_threshold=args.fim_threshold,
                  fim_samples=args.fim_samples,
                  use_incdet=args.incdet,
                  incdet_threshold=args.incdet_threshold)
Exemple #31
0
def process_dataset_iris(pkl_dataset_dir):
    dataset_load = load_data(pkl_dataset_dir)
    data = dataset_load[0]
    data = trans_complex_to_real(data)
    target = dataset_load[1]
    return Bunch(data=data, target=target)
Exemple #32
0
__author__ = 'lizuyao'
import pickle
import datasets
import sys
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from unbalanced_dataset import UnderSampler
from mpl_toolkits.mplot3d import Axes3D

X_reduced=pickle.load(open(sys.argv[1], "rb"))
fileName = sys.argv[2]
X, Y = datasets.load_data(fileName)

# Generate the new dataset using under-sampling method
verbose = False
# 'Random under-sampling'
# ratio of majority elements to sample with respect to the number of minority cases.
US = UnderSampler(ratio=1.,verbose=verbose)
X_reduced, Y = US.fit_transform(X_reduced, Y)

# To getter a better understanding of interaction of the dimensions
# plot the first three tsne dimensions
fig = plt.figure(1, figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=Y, cmap=plt.cm.Paired)
ax.set_title("First three tsne directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
    datasets = []
    for dataset_id in 'ABC':
        datasets.append([url_pattern % (dataset_id, train_or_test)
                         for train_or_test in ['train', 'test']])

    models = ['LDA', 'LogitReg', 'LinearReg', 'QDA']
    n_models = len(models)
    n_datasets = len(datasets)
    scores = np.ndarray((n_datasets * 2, n_models))
    for dataset, j in zip(datasets, xrange(n_datasets)):
        train, test = dataset

        # load the data
        dataset_name = os.path.basename(train.replace(".train", ""))
        print ">" * 80, "Begin (%s)" % dataset_name
        train = load_data(train, data_dir=os.getcwd())
        test = load_data(test, data_dir=os.getcwd())
        X_train = train[..., :-1]
        Y_train = train[..., -1]
        X_test = test[..., :-1]
        Y_test = test[..., -1]

        # fit models
        for model_id, i in zip(models, xrange(n_models)):
            print "\nRunning %s ..." % model_id
            model = eval(model_id)(verbose=0).fit(X_train, Y_train)
            print "... done."
            model.print_params()

            mistrain = (model.predict(X_train) != Y_train
                        ).sum() / (1. * len(Y_train))
            self.means_ = self.labels_.dot(X)  # uncorrected
            counts = self.labels_.sum(axis=1)  # number of points in each class
            for k in xrange(self.n_classes):
                # update proportions
                self.priors_[k] = counts[k] / N

                # update means
                self.means_[k] /= counts[k]

                # update covariance matrices
                X_k = X - self.means_[k]
                D = np.diag(self.labels_[k])
                self.covariance_matrices_[k] = X_k.T.dot(D.dot(
                        X_k)) / counts[k]

        return self

if __name__ == '__main__':
    import matplotlib.pyplot as plt

    X = load_data(
        "http://www.di.ens.fr/~fbach/courses/fall2013/EMGaussian.data")

    # fit
    em = EM(4).fit(X, max_iter=50)

    # plot results
    plt.scatter(*X.T, c=em.labels_.argmax(axis=0))

    plt.show()