Beispiel #1
0
Datei: main.py Projekt: taojt/dcn
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params):
    fd = FeatureDictionary(dfTrain,
                           dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS,
                           category_cols=config.CATEGORICAL_COLS)

    print(fd.feat_dim)
    print(fd.feat_dict)

    data_parser = DataParser(feat_dict=fd)
    cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse(
        df=dfTrain, has_label=True)
    cate_Xi_test, cate_Xv_test, numeric_Xv_test, ids_test = data_parser.parse(
        df=dfTest)

    dcn_params["cate_feature_size"] = fd.feat_dim
    dcn_params["field_size"] = len(cate_Xi_train[0])
    dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS)

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_ = _get(
            cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx), _get(
                numeric_Xv_train, train_idx), _get(y_train, train_idx)
        cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_ = _get(
            cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx), _get(
                numeric_Xv_train, valid_idx), _get(y_train, valid_idx)

        dcn = DCN(**dcn_params)

        dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_,
                cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_)
Beispiel #2
0
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params):

    fd = FeatureDictionary(dfTrain,dfTest,numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS,
                           cate_cols = config.CATEGORICAL_COLS)

    print(fd.feat_dim)
    print(fd.feat_dict)

    data_parser = DataParser(feat_dict=fd)
    cate_Xi_train, cate_Xv_train, numeric_Xv_train,y_train = data_parser.parse(df=dfTrain, has_label=True)
    cate_Xi_test, cate_Xv_test, numeric_Xv_test,ids_test = data_parser.parse(df=dfTest)

    dcn_params["cate_feature_size"] = fd.feat_dim
    dcn_params["field_size"] = len(cate_Xi_train[0])
    dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS)

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_ = _get(cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx),_get(numeric_Xv_train, train_idx), _get(y_train, train_idx)
        cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_ = _get(cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx),_get(numeric_Xv_train, valid_idx), _get(y_train, valid_idx)

        dcn =  DCN(**dcn_params)

        dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_)
Beispiel #3
0
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params):

    fd = FeatureDictionary(dfTrain,dfTest,numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS,
                           cate_cols = config.CATEGORICAL_COLS)

    print(fd.feat_dim)
    print(fd.feat_dict)

    data_parser = DataParser(feat_dict=fd)
    cate_Xi_train, cate_Xv_train, numeric_Xv_train,y_train = data_parser.parse(df=dfTrain, has_label=True)
    cate_Xi_test, cate_Xv_test, numeric_Xv_test,ids_test = data_parser.parse(df=dfTest)

    dcn_params["cate_feature_size"] = fd.feat_dim
    dcn_params["field_size"] = len(cate_Xi_train[0])
    dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS)

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        print("i",i)
        cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_ = _get(cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx),_get(numeric_Xv_train, train_idx), _get(y_train, train_idx)
        cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_ = _get(cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx),_get(numeric_Xv_train, valid_idx), _get(y_train, valid_idx)

        dcn = DCN(**dcn_params)

        s=dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_,i)
        dcn.saver.save(s, 'D:/code/tensorflow_practice/recommendation/Basic-DCN-Demo/model/model', global_step=i + 1)
def plot_dcn():
    # 读取数据
    data, dense_features, sparse_features = read_criteo_data()
    dense_features = dense_features[:3]
    sparse_features = sparse_features[:2]

    # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat)
    linear_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    dnn_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    # 构建AFM模型
    history = DCN(linear_feature_columns, dnn_feature_columns)
    keras.utils.plot_model(history, to_file="./imgs/DCN.png", show_shapes=True)
Beispiel #5
0
def main(unused_argv):
    FLAGS.dummy_cols = [
        'banner_pos', 'device_conn_type', 'C1', 'C15', 'C16', 'C18'
    ]
    dp = DataPreprocess(FLAGS.dummy_cols, FLAGS.numerical_cols,
                        FLAGS.target_colname, FLAGS.train_file,
                        FLAGS.test_file)
    train_features, train_labels = dp.parse_data(FLAGS.train_file)
    test_features, test_labels = dp.parse_data(FLAGS.test_file)
    print(train_features['dfi'][:10])
    print(train_features['dfv'][:10])
    print(train_labels[:10])
    print('----------------------------------')

    feature_nums = dp.feature_nums
    field_nums = len(dp.all_cols)

    model = DCN(feature_nums, field_nums, args=FLAGS)

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        train(sess,
              model,
              train_features,
              train_labels,
              batch_size=FLAGS.batch_size,
              epochs=FLAGS.epochs,
              checkpoint_dir=FLAGS.checkpoint_dir)

    evaluate(test_features, test_labels, checkpoint_dir=FLAGS.checkpoint_dir)
Beispiel #6
0
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params):
    # 类别型特征与索引的映射
    fd = FeatureDictionary(dfTrain,
                           dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS,
                           cate_cols=config.CATEGORICAL_COLS)

    print(fd.feat_dim)
    print(fd.feat_dict)

    # 返回类别型特征索引,类别型特征值,数值型特征,标签值
    data_parser = DataParser(feat_dict=fd)
    cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse(
        df=dfTrain, has_label=True)
    cate_Xi_test, cate_Xv_test, numeric_Xv_test, _ = data_parser.parse(
        df=dfTest)

    # 离散型特征onthot后类别型特征个数
    dcn_params["n_cate_feature"] = fd.feat_dim
    # 离散型特征个数
    dcn_params["n_field"] = len(cate_Xi_train[0])
    print('values', str(fd.feat_dim), 'values', str(len(cate_Xi_train[0])))

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        # 训练集
        cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_ = _get(
            cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx), _get(
                numeric_Xv_train, train_idx), _get(y_train, train_idx)
        # 验证集
        cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_ = _get(
            cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx), _get(
                numeric_Xv_train, valid_idx), _get(y_train, valid_idx)

        dcn = DCN(**dcn_params)

        dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_,
                cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_)
Beispiel #7
0
    def eval(eval_parameters, device):
        print(".. Evaluation started ..")
        treated_set = eval_parameters["treated_set"]
        control_set = eval_parameters["control_set"]
        model_path = eval_parameters["model_save_path"]
        network = DCN(training_flag=False).to(device)
        network.load_state_dict(torch.load(model_path, map_location=device))
        network.eval()
        treated_data_loader = torch.utils.data.DataLoader(treated_set,
                                                          shuffle=False,
                                                          num_workers=1)
        control_data_loader = torch.utils.data.DataLoader(control_set,
                                                          shuffle=False,
                                                          num_workers=1)

        err_treated_list = []
        err_control_list = []

        for batch in treated_data_loader:
            covariates_X, ps_score, y_f, y_cf = batch
            covariates_X = covariates_X.to(device)
            ps_score = ps_score.squeeze().to(device)
            treatment_pred = network(covariates_X, ps_score)

            predicted_ITE = treatment_pred[0] - treatment_pred[1]
            true_ITE = y_f - y_cf
            if torch.cuda.is_available():
                diff = true_ITE.float().cuda() - predicted_ITE.float().cuda()
            else:
                diff = true_ITE.float() - predicted_ITE.float()

            err_treated_list.append(diff.item())

        for batch in control_data_loader:
            covariates_X, ps_score, y_f, y_cf = batch
            covariates_X = covariates_X.to(device)
            ps_score = ps_score.squeeze().to(device)
            treatment_pred = network(covariates_X, ps_score)

            predicted_ITE = treatment_pred[0] - treatment_pred[1]
            true_ITE = y_cf - y_f
            if torch.cuda.is_available():
                diff = true_ITE.float().cuda() - predicted_ITE.float().cuda()
            else:
                diff = true_ITE.float() - predicted_ITE.float()
            err_control_list.append(diff.item())

        # print(err_treated_list)
        # print(err_control_list)
        return {
            "treated_err": err_treated_list,
            "control_err": err_control_list,
        }
Beispiel #8
0
def _run_base_model_dfm(Xi_train, Xv_train, y_train, Xi_test, Xv_test,
                        ids_test, cate_cnt, folds, dfm_params):
    dfm_params["cate_feature_size"] = cate_cnt
    dfm_params["cate_field_size"] = len(Xi_train[0])
    dfm_params["num_field_size"] = len(Xv_train[0])

    y_train_meta = np.zeros((Xi_train.shape[0], 1), dtype=float)
    y_test_meta = np.zeros((Xi_test.shape[0], 1), dtype=float)
    _get = lambda x, l: [x[i] for i in l]
    gini_results_cv = np.zeros(len(folds), dtype=float)
    gini_results_epoch_train = np.zeros((len(folds), dfm_params["epoch"]),
                                        dtype=float)
    gini_results_epoch_valid = np.zeros((len(folds), dfm_params["epoch"]),
                                        dtype=float)
    for i, (train_idx, valid_idx) in enumerate(folds):
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(
            Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(
            Xv_train, valid_idx), _get(y_train, valid_idx)

        dfm = DCN(**dfm_params)
        dfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)

        y_train_meta[valid_idx, 0] = dfm.predict(Xi_valid_, Xv_valid_)
        y_test_meta[:, 0] += dfm.predict(Xi_test, Xv_test)

        gini_results_cv[i] = gini_norm(y_valid_, y_train_meta[valid_idx])
        gini_results_epoch_train[i] = dfm.train_result
        gini_results_epoch_valid[i] = dfm.valid_result

    y_test_meta /= float(len(folds))

    # save result
    if dfm_params["use_cross"] and dfm_params["use_deep"]:
        clf_str = "DeepAndCross"
    elif dfm_params["use_cross"]:
        clf_str = "CROSS"
    elif dfm_params["use_deep"]:
        clf_str = "DNN"
    print("%s: %.5f (%.5f)" %
          (clf_str, gini_results_cv.mean(), gini_results_cv.std()))
    filename = "%s_Mean%.5f_Std%.5f.csv" % (clf_str, gini_results_cv.mean(),
                                            gini_results_cv.std())
    _make_submission(ids_test, y_test_meta, filename)

    _plot_fig(gini_results_epoch_train, gini_results_epoch_valid, clf_str)

    return y_train_meta, y_test_meta
                               transform=transformer)
    test_set = datasets.MNIST(args.dir, train=False, transform=transformer)
    train_limit = list(range(
        0, len(train_set))) if not args.test_run else list(range(0, 500))
    test_limit = list(range(0, len(test_set))) if not args.test_run else list(
        range(0, 500))

    train_loader = torch.utils.data.DataLoader(Subset(train_set, train_limit),
                                               batch_size=args.batch_size,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(Subset(test_set, test_limit),
                                              batch_size=args.batch_size,
                                              shuffle=False)

model = DCN(args)
rec_loss_list = model.pretrain(train_loader, epoch=args.pre_epoch)
pre_trained_AE = copy.deepcopy(model.autoencoder)
# model.autoencoder = pre_trained_AE
# initial_clustering = model.clustering
# model.pre_cluster(train_loader)
nmi_list = []
ari_list = []

model.args = args
# model.clustering = initial_clustering
reducer = umap.UMAP()
for e in range(args.epoch):
    # Print training set
    if e % 1 == 0:
        out = model.autoencoder(torch.FloatTensor(np.array(X_train)).to(
    args = parser.parse_args()

    # Load data
    transformer = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_set = datasets.MNIST(args.dir,
                               train=True,
                               download=True,
                               transform=transformer)
    test_set = datasets.MNIST(args.dir, train=False, transform=transformer)
    train_limit = list(range(
        0, len(train_set))) if not args.test_run else list(range(0, 500))
    test_limit = list(range(0, len(test_set))) if not args.test_run else list(
        range(0, 500))

    train_loader = torch.utils.data.DataLoader(Subset(train_set, train_limit),
                                               batch_size=args.batch_size,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(Subset(test_set, test_limit),
                                              batch_size=args.batch_size,
                                              shuffle=False)

    # Main body
    model = DCN(args)
    rec_loss_list, nmi_list, ari_list = solver(args, model, train_loader,
                                               test_loader)
Beispiel #11
0
    test_size = 0.2
    k = 8
    layer_num = 6
    output_dim = 1
    reg = 1e-4

    # =============== 准备数据 ===============
    dense_feature = ['I' + str(i) for i in range(1, 14)]
    sparse_feature = ['C' + str(i) for i in range(1, 27)]
    embed_dict, train_df, test_df = preprocess(args.file_path, sample_num,
                                               test_size)
    embed_num = list(embed_dict.values())
    input_dim = len(dense_feature) + len(sparse_feature) * k
    hidden_units = [input_dim, 256, 128, 64]
    train_dataset = DCNDataset(train_df, dense_feature, sparse_feature)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)

    # =============== 创建模型 ===============
    DCN_model = DCN(embed_num, k, input_dim, layer_num, hidden_units,
                    output_dim)
    loss_func = nn.BCELoss()
    optimizer = optim.Adam(DCN_model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=reg)

    # =============== 模型训练与测试 ===============
    train(DCN_model, args.epochs, train_loader, loss_func, optimizer)
    test(DCN_model, test_df, dense_feature, sparse_feature)
Beispiel #12
0
    def train(self, train_parameters, device):
        epochs = train_parameters["epochs"]
        treated_batch_size = train_parameters["treated_batch_size"]
        control_batch_size = train_parameters["control_batch_size"]
        lr = train_parameters["lr"]
        shuffle = train_parameters["shuffle"]
        model_save_path = train_parameters["model_save_path"].format(epochs, lr)
        treated_set_train = train_parameters["treated_set_train"]
        control_set_train = train_parameters["control_set_train"]

        input_nodes = train_parameters["input_nodes"]

        phases = ['train', 'val']

        print("Saved model path: {0}".format(model_save_path))

        treated_data_loader_train = torch.utils.data.DataLoader(treated_set_train,
                                                                batch_size=treated_batch_size,
                                                                shuffle=shuffle,
                                                                num_workers=1)

        control_data_loader_train = torch.utils.data.DataLoader(control_set_train,
                                                                batch_size=control_batch_size,
                                                                shuffle=shuffle,
                                                                num_workers=1)

        network = DCN(training_flag=True, input_nodes=input_nodes).to(device)
        optimizer = optim.Adam(network.parameters(), lr=lr)
        lossF = nn.MSELoss()
        min_loss = 100000.0
        dataset_loss = 0.0
        print(".. Training started ..")
        print(device)
        for epoch in range(epochs):
            network.train()
            total_loss = 0
            train_set_size = 0

            if epoch % 2 == 0:
                dataset_loss = 0
                # train treated
                network.hidden1_Y1.weight.requires_grad = True
                network.hidden1_Y1.bias.requires_grad = True
                network.hidden2_Y1.weight.requires_grad = True
                network.hidden2_Y1.bias.requires_grad = True
                network.out_Y1.weight.requires_grad = True
                network.out_Y1.bias.requires_grad = True

                network.hidden1_Y0.weight.requires_grad = False
                network.hidden1_Y0.bias.requires_grad = False
                network.hidden2_Y0.weight.requires_grad = False
                network.hidden2_Y0.bias.requires_grad = False
                network.out_Y0.weight.requires_grad = False
                network.out_Y0.bias.requires_grad = False

                for batch in treated_data_loader_train:
                    covariates_X, ps_score, y_f, y_cf = batch
                    covariates_X = covariates_X.to(device)
                    ps_score = ps_score.squeeze().to(device)

                    train_set_size += covariates_X.size(0)
                    treatment_pred = network(covariates_X, ps_score)
                    # treatment_pred[0] -> y1
                    # treatment_pred[1] -> y0
                    predicted_ITE = treatment_pred[0] - treatment_pred[1]
                    true_ITE = y_f - y_cf
                    if torch.cuda.is_available():
                        loss = lossF(predicted_ITE.float().cuda(),
                                     true_ITE.float().cuda()).to(device)
                    else:
                        loss = lossF(predicted_ITE.float(),
                                     true_ITE.float()).to(device)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()
                dataset_loss = total_loss

            elif epoch % 2 == 1:
                # train controlled
                network.hidden1_Y1.weight.requires_grad = False
                network.hidden1_Y1.bias.requires_grad = False
                network.hidden2_Y1.weight.requires_grad = False
                network.hidden2_Y1.bias.requires_grad = False
                network.out_Y1.weight.requires_grad = False
                network.out_Y1.bias.requires_grad = False

                network.hidden1_Y0.weight.requires_grad = True
                network.hidden1_Y0.bias.requires_grad = True
                network.hidden2_Y0.weight.requires_grad = True
                network.hidden2_Y0.bias.requires_grad = True
                network.out_Y0.weight.requires_grad = True
                network.out_Y0.bias.requires_grad = True

                for batch in control_data_loader_train:
                    covariates_X, ps_score, y_f, y_cf = batch
                    covariates_X = covariates_X.to(device)
                    ps_score = ps_score.squeeze().to(device)

                    train_set_size += covariates_X.size(0)
                    treatment_pred = network(covariates_X, ps_score)
                    # treatment_pred[0] -> y1
                    # treatment_pred[1] -> y0
                    predicted_ITE = treatment_pred[0] - treatment_pred[1]
                    true_ITE = y_cf - y_f
                    if torch.cuda.is_available():
                        loss = lossF(predicted_ITE.float().cuda(),
                                     true_ITE.float().cuda()).to(device)
                    else:
                        loss = lossF(predicted_ITE.float(),
                                     true_ITE.float()).to(device)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()
                dataset_loss = dataset_loss + total_loss

            # print("epoch: {0}, train_set_size: {1} loss: {2}".
            #       format(epoch, train_set_size, total_loss))
            if epoch % 10 == 9:
                print("epoch: {0}, Treated + Control loss: {1}".format(epoch, dataset_loss))
            # if epoch % 2 == 1:
            #     print("epoch: {0}, Treated + Control loss: {1}".format(epoch, dataset_loss))
                # if dataset_loss < min_loss:
                #     print("Current loss: {0}, over previous: {1}, Saving model".
                #           format(dataset_loss, min_loss))
                #     min_loss = dataset_loss
        torch.save(network.state_dict(), model_save_path)
Beispiel #13
0
                        help='decay rate',
                        type=float,
                        default=0.99)
    args = parser.parse_args(args=[])

    # load data set
    X_train_cate, X_train_cont, y_train, X_test_cate, X_test_cont, y_test, cate_list = load_dataset(
        args.input_dir)

    cate_num = X_train_cate.shape[1]
    cont_num = X_train_cont.shape[1]

    tf.reset_default_graph()
    with tf.Session() as sess:
        # define model
        model = DCN.DCN(args, cate_num, cont_num, cate_list)
        model.build()

        ckpt = tf.train.get_checkpoint_state(
            os.path.join(args.input_dir, args.model_name))
        if ckpt:
            print('Loading model parameters from %s' %
                  ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('Creating model with inital parameters')
            sess.run(tf.global_variables_initializer())

        step = 0
        for epoch in range(args.epoch):
            start_time = time.time()
Beispiel #14
0
from DCN import DCN
import sys
nrows = None
if len(sys.argv) > 1:
    nrows = sys.argv[1]
    nrows = int(nrows)

if __name__ == '__main__':
    path = '../data/data.csv'

    feature_size, data = data_loader.data_load('../data/data.csv', nrows=nrows)
    features = ['userId', 'movieId', 'tag']

    num = data.shape[0] * 4 // 5

    model = DCN(features, feature_size, embedding_size=8, verbose=False)

    X = data[features].values
    y = data.label.values.reshape(-1, 1)
    '''
    model.fit(
        X[:num],y[:num], epoch=10,
        X_valid=X[num:],y_valid=y[num:],
        early_stopping=True, refit=True
    )
    '''
    import time

    start = time.time()
    model.fit(X[:num], y[:num], epoch=1)
    print('train a epoch cost %.2f' % (time.time() - start))
        train_limit = list(range(
            0, len(train_set))) if not args.test_run else list(range(0, 500))
        test_limit = list(range(
            0, len(test_set))) if not args.test_run else list(range(0, 500))

        train_loader = torch.utils.data.DataLoader(Subset(
            train_set, train_limit),
                                                   batch_size=args.batch_size,
                                                   shuffle=True)

        test_loader = torch.utils.data.DataLoader(Subset(test_set, test_limit),
                                                  batch_size=args.batch_size,
                                                  shuffle=False)

    # Main body
    model = DCN(args)
    rec_loss_list, nmi_list, ari_list = solver(args, model, train_loader,
                                               test_loader)

    # X_train = X_train.to(self.device)
    # print(y_train[0])
    out = model.autoencoder(torch.FloatTensor(np.array(X_train)), latent=True)
    reducer = umap.UMAP()
    # print(help(umap))
    X2 = reducer.fit_transform(out.detach().numpy())
    c = [color[int(y_train.iloc[i])] for i in range(len(y_train))]
    plt.scatter(X2[:, 0], X2[:, 1], color=c)
    plt.show()

    X4 = reducer.fit_transform(X_train)
    plt.scatter(X4[:, 0], X4[:, 1], color=c)