Esempio n. 1
0
def worker(config, idx, itt, num_gpus, log_path):
    print(f"Starting worker for config {idx} -> iteration {itt}")

    torch.set_num_threads(1)
    training = loader(55000, args.batch_size, 0)
    validation = loader(7500, 1, 55000)

    es = EarlyStopping(monitor='val_loss',
                       min_delta=1e-3,
                       patience=20,
                       mode='min',
                       verbose=True)
    logging = TrainingDataCallback(f"{log_path}/cae_{idx}_{itt}.json",
                                   log_stats=["val_loss", "epoch"])
    trainer = pl.Trainer(gpus=num_gpus,
                         stochastic_weight_avg=True,
                         callbacks=[es, logging],
                         progress_bar_refresh_rate=0,
                         weights_summary=None,
                         deterministic=True)
    cae = ContractiveAutoEncoder(training,
                                 validation,
                                 config=config,
                                 reduce=True,
                                 seed=itt)

    trainer.fit(cae)
    print(f"\nWorker done for config {idx} -> iteration {itt}\n")
Esempio n. 2
0
def train(args):
    configs = [
        {
            "l1_units": 512,
            "l2_units": 256,
            "l3_units": 128,
            "lambda": 1e-3,
            "actv": nn.PReLU,
            "lr": 1e-2,
            "optimizer": Adagrad
        },
        {
            "l1_units": 560,
            "l2_units": 304,
            "l3_units": 208,
            "lambda": 1e-5,
            "actv": nn.SELU,
            "lr": 1e-2,
            "optimizer": Adagrad
        },
        {
            "l1_units": 560,
            "l2_units": 328,
            "l3_units": 208,
            "lambda": 1e-5,
            "actv": nn.SELU,
            "lr": 1e-2,
            "optimizer": Adagrad
        },
        {
            "l1_units": 512,
            "l2_units": 256,
            "l3_units": 160,
            "lambda": 1e-5,
            "actv": nn.SELU,
            "lr": 1e-2,
            "optimizer": Adagrad
        },
        {
            "l1_units": 576,
            "l2_units": 328,
            "l3_units": 176,
            "lambda": 1e-5,
            "actv": nn.PReLU,
            "lr": 1e-2,
            "optimizer": Adagrad
        },
    ]

    training = loader(55000, args.batch_size, 0)
    validation = loader(7500, 1, 55000)

    if args.model_id is None:
        for n, config in enumerate(configs):
            iteration_loop(config, n, args.itt, training, validation,
                           args.num_gpus, args.log_path)
    else:
        iteration_loop(configs[args.model_id], args.model_id, args.itt,
                       training, validation, args.num_gpus, args.log_path)
Esempio n. 3
0
def train(args):
    if not os.path.exists(f"{project_path}/{args.model_path}"):
        os.makedirs(f"{project_path}/{args.model_path}")

    # pl.seed_everything(2)
    training = dl.loader(55000, args.batch_size, 0)
    validation = dl.loader(8250, 1, 55000)
    test = dl.loader(5000, 1, 63250)

    config = {
        "l1_units": 576,
        "l2_units": 328,
        "l3_units": 176,
        "lambda": 1e-5,
        "actv": nn.PReLU,
        "lr": 1e-2,
        "optimizer": Adagrad
    }
    cae = ContractiveAutoEncoder(training,
                                 validation,
                                 config=config,
                                 test_dataloader=test,
                                 reduce=True)
    es = EarlyStopping(monitor='val_loss',
                       min_delta=1e-3,
                       patience=10,
                       mode='min',
                       verbose=True)
    checkpointing = ModelCheckpoint(
        monitor='val_loss',
        dirpath=f"{project_path}/{args.model_path}/",
        filename=args.output_filename,
        verbose=True,
        save_top_k=1)

    trainer = pl.Trainer(gpus=1,
                         auto_select_gpus=True,
                         callbacks=[es, checkpointing],
                         stochastic_weight_avg=True,
                         deterministic=True,
                         benchmark=True)

    trainer.fit(cae)
Esempio n. 4
0
def plot_pathes(paths, storedir):
	global store_dir
	global pathes
	pathes = paths
	store_dir = storedir
	if not os.path.exists(storedir):
		os.mkdir(storedir)
	loader = data_loader.loader()
	benches = loader.get_benchnames_pathes(pathes)
	for bench in benches:
		plot_bench(bench)
Esempio n. 5
0
def train(config, batch_size, num_epochs=20, num_gpus=0):
    training = dl.loader(55000, batch_size, 0)
    validation = dl.loader(8250, 1, 55000)
    cae = ContractiveAutoEncoder(training_dataloader=training,
                                 val_dataloader=validation,
                                 config=config)
    trainer = pl.Trainer(
        max_epochs=num_epochs,
        gpus=num_gpus,
        auto_select_gpus=True if num_gpus else False,
        logger=TensorBoardLogger(save_dir=tune.get_trial_dir(),
                                 name="",
                                 version='.'),
        stochastic_weight_avg=True,
        benchmark=True,
        callbacks=[
            TuneReportCheckpointCallback({"loss": "val_loss"},
                                         filename="checkpoint",
                                         on="validation_end")
        ])

    trainer.fit(cae)
Esempio n. 6
0
def main(batch_size=64,
         max_epochs=100,
         validation_split=0.2,
         early_stop=EarlyStopping()):
    model_hdf5_path = "./hdf5s/"

    sampler = data_loader.loader()

    modeler = embedding_model.embedding_model()

    if args.model_name == "embedding_model":
        # training
        x, names, y = sampler.sample_embedding(
            type="pretrain-NYtaxi",
            short_term_lstm_seq_len=args.short_term_lstm_seq_len)

        #print(np.array(x).shape)
        #print(np.array(names).shape)
        #print(np.array(y).shape)

        model = modeler.embedding(embedding_shape=128,
                                  nbhd_size=1,
                                  volume_type=2,
                                  cnn_flat_size=128,
                                  lstm_seq_len=args.short_term_lstm_seq_len)

        model.fit(x=x,
                  y=y,
                  batch_size=batch_size,
                  validation_split=validation_split,
                  epochs=max_epochs,
                  callbacks=[early_stop])

        model.save('embedding_model')

        layer_model = Model(inputs=model.input,
                            outputs=model.get_layer('embedding').output)

        feature = np.array(layer_model.predict(x=x, ))

        np.save('./Data/Pretrain_data_NYtaxi/NYtaxi_pretrain_embedding',
                feature)

        return

    else:
        print("Cannot recognize parameter...")
        return
Esempio n. 7
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    train_loader, test_loader = data_loader.loader()

    model = net.Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
    epochs = 1
    for epoch in range(1, epochs + 1):
        train.train(model, device, train_loader, optimizer, epoch)

    PATH = './mnist_result.pth'
    torch.save(model.state_dict(), PATH)

    model = net.Net().to(device)
    model.load_state_dict(torch.load(PATH))
    test.test(model, device, test_loader)
Esempio n. 8
0
def plot_pathes(pathes, storedir):
	# For easy data loading, you could use the data_loader utilities
	loader = data_loader.loader()
	# You really should reduce the benchmarks loaded by defining only_benchs
	# which will cause the loader to load only xyz data
	loader.load_pathes(pathes, only_benchs=['xyz'])
	data = loader.get_data()
	# Also have a look to get_filtered_data for data filtered by arguments/monitors

	# Somehow reformat data to be able to plot it
	# You can use plot_utils to plot to mathplotlib
	# The following call will not succeed because the data is not correctly
	# formated for plot_bar_chart. See plot_utils.py for information about
	# the format
	# Alternative: plot_line_chart(data)
	plot_utils.plot_bar_chart(data)
	# After this call the plot can be found in matplotlib's main plot
	plt.show()
Esempio n. 9
0
def main():
    """The main function
    :return:
    """
    spark_session, spark_context = initialize_spark()

    # download data
    if conf["VERBOSE"]:
        print("start download...\n")
    raw_dataframe = loader()

    # preprocessing
    if conf["VERBOSE"]:
        print("preprocessing...\n")
    preprocessed_pd_dataframe = preprocessing(raw_dataframe)

    # return to spark dataframe and make some adjustment
    preprocessed_spark_dataframe = spark_session.createDataFrame(
        preprocessed_pd_dataframe)
    w = Window().orderBy('Time')
    preprocessed_spark_dataframe = preprocessed_spark_dataframe.withColumn(
        "ID",
        row_number().over(w))

    if conf["VERBOSE"]:
        print("classifier task start...\n")
    # time start
    start = time.time()

    # initialize the classifier class and classify
    classifier = Classifier(preprocessed_spark_dataframe, spark_session)
    predictions = classifier.classify()

    # time end
    end = time.time()

    # store time on file
    write_time_on_file(calculate_elapsed_time(start, end))

    # get the evaluation metrics
    binary_evaluator, metrics = evaluate_predictions(predictions)

    if conf["VERBOSE"]:
        print_prediction_metrics(metrics)
Esempio n. 10
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    cnn = models.vgg19(pretrained=True).features.to(device).eval()
    cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
    cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)

    content_layers_selected = ['conv_4']
    style_layers_selected = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']

    style_weight = 1e8

    style_path = "../style"
    content_path = "../content"

    styleDir = [f for f in listdir(style_path) if isfile(join(style_path, f))]
    contentDir = [
        f for f in listdir(content_path) if isfile(join(content_path, f))
    ]

    image_no = 1

    for content in contentDir:
        for style in styleDir:
            style_img, content_img = loader(style, content, device)
            input_img = content_img.clone().detach().requires_grad_(True)

            output = run_style_transfer(cnn,
                                        cnn_normalization_mean,
                                        cnn_normalization_std,
                                        content_img,
                                        style_img,
                                        input_img,
                                        num_steps=300,
                                        style_weight=style_weight,
                                        content_layers=content_layers_selected,
                                        style_layers=style_layers_selected,
                                        device)

            path = '../results/img_' + str(image_no) + '.jpg'
            output.save(path, 'JPEG')
            image_no += 1
Esempio n. 11
0
def classify(short_term_lstm_seq_len, number_class, type, res, savepath):
    sampler = data_loader.loader()
    x, names, y = sampler.sample_embedding(
        type=type, short_term_lstm_seq_len=short_term_lstm_seq_len)
    x = np.array(x)
    y = np.array(y)
    res = np.squeeze(res)
    print(res.shape)
    print(x.shape)
    print(y.shape)
    print(set(res.tolist()))
    for i in np.arange(number_class):
        print(i)
        index = np.array(np.squeeze(np.argwhere(res == i)))
        iclass_data = np.array([x[:, index, :, :, :], y[index, :]])
        print(np.array(iclass_data[0]).shape)
        print(np.array(iclass_data[1]).shape)
        np.save(savepath + 'class_data/' + str(int(i)) + 'class', iclass_data)
        del iclass_data
        gc.collect()
Esempio n. 12
0
def plot2d():
    ld = dl.loader(trainPercent=75)
    tri = ld.getTrainInp()
    tro = ld.getTrainOut()
    tsi = ld.getTestInp()
    tso = ld.getTestOut()
    mlp = MLP(ld, (12, 30))
    e_tr, e_ts = mlp.learn(2500, epsilon=0.002)
    e_tr = [e_tr[i][0][0] for i in range(len(e_tr))]
    e_tr_x = [i for i in range(1, len(e_tr) + 1)]
    e_ts_x = [i for i in range(1, len(e_ts) + 1)]
    f = plt.figure()
    fa1 = f.add_subplot(3, 1, 1)
    fa1.plot(tri, tro, "r+")
    out = mlp.calc(tri)
    fa1.plot(tri, out, "bo")
    out = mlp.calc(tsi)
    fa1.plot(tsi, tso, "yo")
    fa1.plot(tsi, out, "go")
    fa2 = f.add_subplot(3, 1, 2)
    fa2.plot(e_tr_x, e_tr, "r-")
    fa3 = f.add_subplot(3, 1, 3)
    fa3.plot(e_ts_x, e_ts, "b-")
    plt.show()
    def load_data(self, K, subject_id, seed):
        '''
        load train and val data
        return in shape x = (ncohorts, nsubjects, nsamples, 3000, 1)
        '''
        logger.log("K:", K, "subject_id:", subject_id)
        x_train, y_train = [], []
        x_val, y_val = [], []
        x_test, y_test = [], []
        print('Preparing pre-train data..')

        n_metaval_tasks = 0
        n_metatrain_tasks = 0
        task_index = -1

        self.data_list = []

        ds_name = self.ds_name
        logger.log('$$$', ds_name)
        ch = self.channel
        self.data_list.append(ds_name + "_" + ch)
        logger.log('$$', ch)
        subjects = data_loader.get_subject_lists(
            ds_name, configure.datasets[ds_name]['path'], ch)
        print('subjects:', len(subjects), subjects)

        for subj in subjects:
            if subj == subject_id:
                xx, yy = data_loader.loader(
                    configure.datasets[ds_name]['path'], ch, subj)

                for x, y in zip(xx, yy):
                    x_t, y_t, x_v, y_v, x_te, y_te = [], [], [], [], [], []

                    # use only if nsamples/class more than 'min_samples'
                    bp = configure.datasets[ds_name]['bandpass']
                    if bp[0] != None and bp[1] != None:
                        logger.log('bandpass:'******'at', bp)
                        x = preprocessor.bandpass_filter(x,
                                                         low=bp[0],
                                                         high=bp[1])

                    if len(configure.modals) == 1:
                        print('x', x.shape)

                        if len(x.shape) == 3:
                            x = x[:, :, 0]  # EEG ONLY (From 3 modals file)
                            x = np.expand_dims(x, axis=-1)
                        else:
                            x = x[:, :, :, 0]  #UCD

                        logger.log('select 1 modal:', x.shape)

                        if '2D' in configure.cnn_type:
                            # just for testing on 2D-CNN with 1 modal
                            x = np.expand_dims(x, axis=-1)

                            logger.log(
                                '‼️‼️ Make sure you use 2D-CNN with 1 modal‼️‼️'
                            )
                    else:
                        if len(x.shape) == 3:
                            x = np.expand_dims(x, axis=-2)

                    samples_per_class = utils.get_sample_per_5class(y)
                    logger.log('all:', len(y), 'samples:', samples_per_class)

                    if any([s < K * 3 for s in samples_per_class]):
                        logger.log(
                            "‼️ This subject has number of samples < K * 3) ‼️"
                        )

                        # use new method to select K samples
                        for c in range(0, 5):
                            num_train = 0
                            num_test = 0
                            num_val = 0

                            x_c = x[y == c]
                            y_c = y[y == c]

                            if seed != -1:
                                x_c, y_c = utils.shuffle_data(
                                    x_c,
                                    y_c,
                                    logger=logger,
                                    fix_val_sample=True,
                                    seed_no=seed)
                            else:
                                # use first K samples as training samples -> no shuffle
                                logger.log('use first K samples to train')

                            if samples_per_class[c] >= K * 3:
                                # select as normal
                                logger.log('class', c, 'samples are enough:',
                                           samples_per_class[c])

                                # 1. Select K samples/class to train
                                x_t.extend(x_c[:K])
                                y_t.extend(y_c[:K])

                                # 2. Select K samples/class to val
                                x_v.extend(x_c[K:K * 2])
                                y_v.extend(y_c[K:K * 2])

                                # 3. The rest are for testing
                                x_te.extend(x_c[K * 2:])
                                y_te.extend(y_c[K * 2:])

                            else:
                                ####################################
                                ## SELECT TO TRAIN -> VAL -> TEST ##
                                ####################################

                                # 1. Select K samples/class to train
                                num_train = min(K, samples_per_class[c])
                                logger.log('select', num_train,
                                           'samples to train.')
                                x_t.extend(x_c[:num_train])
                                y_t.extend(y_c[:num_train])
                                samples_per_class[c] -= num_train
                                print('After train set, samples_per_class', c,
                                      ' remains:', samples_per_class[c])

                                if samples_per_class[c] > 0:
                                    # 2. Select K samples/class to validate
                                    num_val = min(samples_per_class[c], K)
                                    logger.log('select', num_val,
                                               'samples to validate.')
                                    x_v.extend(x_c[num_train:num_train +
                                                   num_val])
                                    y_v.extend(y_c[num_train:num_train +
                                                   num_val])
                                    samples_per_class[c] -= num_val
                                    print('After val set, samples_per_class',
                                          c, ' remains:', samples_per_class[c])

                                    if samples_per_class[c] > 0:
                                        x_te.extend(x_c[num_train + num_val:])
                                        y_te.extend(y_c[num_train + num_val:])
                                        logger.log(
                                            'select remaining samples to test:',
                                            len(x_c[num_train + num_val:]))

                                print(num_train, num_val,
                                      len(x_c[num_train + num_val:]))
                                assert num_train + num_val + len(
                                    x_c[num_train + num_val:]) == len(x_c)

                        assert len(x_te) == len(y_te)
                        assert len(x_v) == len(y_v)
                        assert len(x_t) == len(y_t)
                        assert len(x_te) + len(x_v) + len(x_t) == len(x)

                        y_t, y_v, y_te = np.array(y_t), np.array(
                            y_v), np.array(y_te)
                        logger.log(
                            'train:', len(y_t), 'samples:',
                            [len(y_t[y_t == clid]) for clid in range(0, 5)])
                        logger.log(
                            'val:', len(y_v), 'samples:',
                            [len(y_v[y_v == clid]) for clid in range(0, 5)])
                        logger.log(
                            'test:', len(y_te), 'samples:',
                            [len(y_te[y_te == clid]) for clid in range(0, 5)])

                    else:
                        # pick K samples/class to fine-tune, the rest are for validation
                        for c in np.unique(y):
                            x_c = x[y == c]
                            y_c = y[y == c]

                            x_c, y_c = utils.shuffle_data(x_c,
                                                          y_c,
                                                          logger=logger,
                                                          fix_val_sample=True,
                                                          seed_no=seed)
                            x_t.extend(x_c[:K])
                            y_t.extend(y_c[:K])
                            x_v.extend(x_c[K:K * 2])
                            y_v.extend(y_c[K:K * 2])
                            x_te.extend(x_c[K * 2:])
                            y_te.extend(y_c[K * 2:])

                        assert len(x_t) == len(y_t) == K * 5
                        assert len(x_v) == len(y_v) == K * 5
                        assert len(x_t) + len(x_v) + len(x_te) == len(x)
                        assert len(y_t) + len(y_v) + len(y_te) == len(y)

                        logger.log('train:', len(y_t), 'samples:',
                                   utils.get_sample_per_class(np.array(y_t)))
                        logger.log('val:', len(y_v), 'samples:',
                                   utils.get_sample_per_class(np.array(y_v)))
                        logger.log('test:', len(y_te), 'samples:',
                                   utils.get_sample_per_class(np.array(y_te)))

                    x_train.append(x_t)
                    y_train.append(y_t)
                    x_val.append(x_v)
                    y_val.append(y_v)
                    x_test.append(x_te)
                    y_test.append(y_te)

                    break  # use first night only

        x_train = np.array(x_train)
        y_train = np.array(y_train)
        x_val = np.array(x_val)
        y_val = np.array(y_val)
        x_test = np.array(x_test)
        y_test = np.array(y_test)
        logger.log('x_train', x_train.shape, 'y_train', y_train.shape)
        logger.log('x_val', x_val.shape, 'y_val', y_val.shape)
        logger.log('x_test', x_test.shape, 'y_test', y_test.shape)

        assert len(x_train) == len(y_train) == len(x_val) == len(y_val) == len(
            x_test) == len(y_test)

        return x_train, y_train, x_val, y_val, x_test, y_test
def save_data():
	data_loader.loader(DATA_PATH, 1100, face = False)
	data_loader.loader(DATA_PATH, 1300, face = True)
	print('Saved.')
    def load_data(self, data_type='train'):
        conf = configure.pretrain
        logger.log('preparing..', data_type)
        results_x, results_y = [], []
        K = conf['K']
        task_index = -1
        for dataset in conf['datasets'][data_type]:
            for channel in conf['datasets'][data_type][dataset]:

                task_index += 1
                results_x.append([])
                results_y.append([])
                logger.log(dataset, ':', channel)

                subj_list = conf['datasets'][data_type][dataset][channel]
                if type(subj_list) == str and subj_list == 'all':
                    subjects = data_loader.get_subject_lists(
                        dataset, configure.datasets[dataset]['path'], channel)
                else:
                    subjects = conf['datasets'][data_type][dataset][channel]
                logger.log('subjects:', len(subjects), subjects)

                nperc = np.zeros(shape=(5, ))
                for subj in subjects:
                    xx, yy = data_loader.loader(
                        configure.datasets[dataset]['path'], channel, subj)

                    for x, y in zip(xx, yy):
                        class_samples = utils.get_sample_per_class(y)
                        if len(class_samples) == 5 and all(
                            [cl > K * 2 for cl in class_samples]):
                            # use only if nsamples/class more than 'K' * 2
                            bp = configure.datasets[dataset]['bandpass']
                            if bp[0] != None and bp[1] != None:
                                logger.log('bandpass:'******'at', bp)
                                x = preprocessor.bandpass_filter(x,
                                                                 low=bp[0],
                                                                 high=bp[1])

                            if len(configure.modals) == 1:
                                if x.shape[-1] == 3:
                                    x = x[:, :,
                                          0]  # EEG ONLY (From 3 modals file)

                                x = np.expand_dims(x, axis=-1)

                                if '2D' in configure.cnn_type:
                                    # just for testing on 2D-CNN with 1 modal
                                    x = np.expand_dims(x, axis=-1)

                                    logger.log(
                                        '‼️‼️ Make sure you use 2D-CNN with 1 modal‼️‼️'
                                    )
                                    logger.log('x', x.shape)
                            else:
                                if len(x.shape) == 2:
                                    x = np.expand_dims(x, axis=-2)

                            if data_type == 'train':
                                logger.log(
                                    '$ train subj: {} {} {} -> not oversample'.
                                    format(subj, x.shape, y.shape,
                                           class_samples))
                                x, y = utils.get_balance_class_oversample(
                                    x, y, logger)

                            else:
                                logger.log('$ val subj: {} {}'.format(
                                    subj, class_samples))

                            nperc += class_samples
                            results_x[task_index].append(x)
                            results_y[task_index].append(y)

                        else:
                            logger.log('$ removed subj: {} {} (< K*2)'.format(
                                subj, class_samples))

                logger.log('task:', task_index, nperc, np.sum(nperc))

        results_x, results_y = np.array(results_x), np.array(results_y)
        logger.log('n_meta' + data_type + '_tasks =', len(results_x))
        for tid, d in enumerate(results_x):
            logger.log('task:', tid, data_type + ':', len(d), 'records, x[0]:',
                       d[0].shape)

        return results_x, results_y
Esempio n. 16
0
    return A


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Merge_sort')

    parser.add_argument('-d',
                        '--data',
                        type=str,
                        nargs='?',
                        default='unknown_data',
                        help='Please chose the dataset: -d almostsorted_10k')

    args = parser.parse_args()

    almostsorted_10k, random_10k, almostsorted_50k, random_50k = loader()
    data = args.data
    if data == 'unknown_data':
        data = input(
            'Enter data type from  almostsorted_10k, random_10k, almostsorted_50k, random_50k : '
        )

    Time = np.zeros(2)
    print('The input array is:')
    print(data)
    for n in range(2):
        start = time.time()
        sorted = Selection_sort(data)
        end = time.time()
        Time[n] = end - start
    print('The sorted array is:')
Esempio n. 17
0
    def load_data(self, data_type='train'):
        conf = configure.maml
        logger.log('preparing..', data_type)
        results_x, results_y = [], []
        K = conf['K']
        task_index = -1
        for dataset in conf['datasets'][data_type]:
            for channel in conf['datasets'][data_type][dataset]:
                task_index += 1
                results_x.append([])
                results_y.append([])
                logger.log(dataset, ':', channel)

                subj_list = conf['datasets'][data_type][dataset][channel]
                if type(subj_list) == str and subj_list == 'all':
                    subjects = data_loader.get_subject_lists(
                        dataset, configure.datasets[dataset]['path'], channel)
                else:
                    subjects = conf['datasets'][data_type][dataset][channel]
                logger.log('subjects:', len(subjects), subjects)

                for subj in subjects:
                    xx, yy = data_loader.loader(
                        configure.datasets[dataset]['path'], channel, subj)

                    for x, y in zip(xx, yy):
                        class_samples = utils.get_sample_per_class(y)

                        # To filter out the same set of subjects as submission version (K=10)
                        logger.log(
                            '‼️ K =', K,
                            'but filter out if subjects contain < 30 samples')

                        if len(class_samples) == 5 and all(
                            [cl >= 30 and cl >= K * 2
                             for cl in class_samples]):
                            # use only if nsamples/class more than 'K' * 2
                            bp = configure.datasets[dataset]['bandpass']
                            if bp[0] != None and bp[1] != None:
                                logger.log('bandpass:'******'at', bp)
                                x = preprocessor.bandpass_filter(x,
                                                                 low=bp[0],
                                                                 high=bp[1])

                            if len(configure.modals) == 1:
                                if x.shape[-1] == 3:
                                    x = x[:, :,
                                          0]  # EEG ONLY (From 3 modals file)

                                x = np.expand_dims(x, axis=-1)
                            else:
                                x = np.expand_dims(x, axis=-2)

                            results_x[task_index].append(x)
                            results_y[task_index].append(y)

                            logger.log('$ added subj: {} {}'.format(
                                subj, class_samples))
                        else:
                            logger.log('$ removed subj: {} {} (< K*2)'.format(
                                subj, class_samples))

        results_x, results_y = np.array(results_x), np.array(results_y)
        logger.log('n_meta' + data_type + '_tasks =', len(results_x))
        for tid, d in enumerate(results_x):
            logger.log('task:', tid, data_type + ':', len(d), 'records, x[0]:',
                       d[0].shape)

        return results_x, results_y
Esempio n. 18
0
    # tfs.RandomRotation(range=(0, 360)),
    tfs.Downsize(),
    tfs.Normalise(),
    # tfs.ChannelShift(),d
    transforms.ToTensor()
])

p = 0.1

batch_size = 64
validation_split = .2
shuffle_dataset = True

data_path = '../Data'
labels_path = '{}/train_labels/train_labels.csv'.format(data_path)
train_loader, validation_loader = loader('{}/train'.format(data_path), labels_path, batch_size, validation_split, p=p, transform=trans)

# inputs, labels = next(iter(train_loader))
# print(labels)
# print(inputs)

# print(model(inputs.float()))
# print(labels)

n_features = 512

try:
    features = np.load('./features/resnet_3a_features_{}.npy'.format(p))
    labels = np.load('./features/resnet_3a_labels_{}.npy'.format(p))
    print('Loaded training data')
except IOError:
Esempio n. 19
0
                               'loss_and_acc_lstm_%s.txt' % (args.dataset)),
                  mode='a') as f:
            f.write('%0.8f\n' % (mse_loss))

        # save the model weights
        if iteration % args.save_iters == 0:
            save_dir = os.path.join(args.save_dir, 'iter_%d' % (iteration))
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            lstm.save_state_dict(os.path.join(save_dir, 'lstm.pth'))


if __name__ == '__main__':

    # get the train and test generator from the data laoder
    train_generator, test_generator = loader(args)

    # is start_iter is not zero, then load the model from the given iteration
    if args.start_iter != 0:
        content_encoder.load_state_dict(os.path.join(
            args.save_dir, 'iter_%d' % (args.start_iter),
            'content_encoder.pth'),
                                        strict=False)
        pose_encoder.load_state_dict(os.path.join(
            args.save_dir, 'iter_%d' % (args.start_iter), 'pose_encoder.pth'),
                                     strict=False)
        decoder.load_state_dict(os.path.join(args.save_dir,
                                             'iter_%d' % (args.start_iter),
                                             'decoder.pth'),
                                strict=False)
        scene_discriminator.load_state_dict(os.path.join(
Esempio n. 20
0
    def train(self):
        """
        Description : train network
        """
        tr_dataloader, te_dataloader = loader(self.config, self.ctx)
        tr_acc_per_epoch = list()
        te_acc_per_epoch = list()
        train_acc = mx.metric.Accuracy()
        test_acc = mx.metric.Accuracy()
        global_va_acc = 0.0
        for epoch in trange(self.epochs):
            tr_acc = list()
            te_acc = list()
            tr_iter = iter(tr_dataloader)
            te_iter = iter(te_dataloader)
            for batch in tqdm(tr_iter):
                x, y = batch
                x, y, last_targets = batch_for_few_shot(
                    self.N, self.K, self.batch_size, x, y)
                with autograd.record():
                    x_split = gluon.utils.split_and_load(x, self.ctx)
                    y_split = gluon.utils.split_and_load(y, self.ctx)
                    last_targets_split = gluon.utils.split_and_load(
                        last_targets, self.ctx)
                    last_model = [
                        self.net(X, Y)[:, -1, :]
                        for X, Y in zip(x_split, y_split)
                    ]
                    loss_val = [
                        self.loss_fn(X, Y)
                        for X, Y in zip(last_model, last_targets_split)
                    ]
                    for l in loss_val:
                        l.backward()
                    for pred, target in zip(last_model, last_targets_split):
                        train_acc.update(preds=nd.argmax(pred, 1),
                                         labels=target)
                        tr_acc.append(train_acc.get()[1])

                self.trainer.step(self.batch_size, ignore_stale_grad=True)

            for batch in tqdm(te_iter):
                x, y = batch
                x, y, last_targets = batch_for_few_shot(self.N, self.K,\
                                                        int(self.batch_size / len(self.ctx)), x, y)
                x = x.copyto(self.ctx[0])
                y = y.copyto(self.ctx[0])
                last_targets = last_targets.copyto(self.ctx[0])
                model_output = self.net(x, y)
                last_model = model_output[:, -1, :]
                test_acc.update(preds=nd.argmax(last_model, 1),
                                labels=last_targets)
                te_acc.append(test_acc.get()[1])
            current_va_acc = np.mean(te_acc)
            if global_va_acc < current_va_acc:
                self.save_model(epoch, round(np.mean(tr_acc), 2),
                                round(np.mean(te_acc), 2))
                global_va_acc = current_va_acc
            print("epoch {e}  train_acc:{ta} test_acc:{tea} ".format(e=epoch,\
                                                                     ta=np.mean(tr_acc),\
                                                                     tea=np.mean(te_acc)))
            self.writer.add_scalar(tag="train_accuracy",
                                   value=np.mean(tr_acc),
                                   global_step=epoch)
            self.writer.add_scalar(tag="test_accuracy",
                                   value=np.mean(te_acc),
                                   global_step=epoch)
            tr_acc_per_epoch.append(np.mean(tr_acc))
            te_acc_per_epoch.append(np.mean(te_acc))
Esempio n. 21
0
NetC = NetC(ngpu=ngpu)
# NetC.apply(weights_init)
print('\n########## CRITIC ##########\n')
print(NetC)
print()

if cuda:
    NetS = NetS.cuda()
    NetC = NetC.cuda()
    # criterion = criterion.cuda()

# setup optimizer
optimizerG = optim.Adam(NetS.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerD = optim.Adam(NetC.parameters(), lr=lr, betas=(beta1, 0.999))
# load training data
dataloader = loader(LITS('preprocessed', (size, size), train=True), batchSize)
# load testing data
dataloader_val = loader(LITS('preprocessed', (size, size), train=False),
                        batchSize)

print('===> Starting training\n')
max_iou = 0
NetS.train()
for epoch in range(1, niter + 1):
    for i, data in tqdm(enumerate(dataloader, 1)):
        ##################################
        ### train Discriminator/Critic ###
        ##################################
        NetC.zero_grad()

        image, target, gt = Variable(data[0]), Variable(data[1]), Variable(
Esempio n. 22
0
def main():
    args = parser.parse_args()
    data_name = ['Scene15', 'Caltech101', 'Reuters_dim10', 'NoisyMNIST-30000']
    NetSeed = 64
    # random.seed(NetSeed)
    np.random.seed(NetSeed)
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(NetSeed)  # 为CPU设置随机种子
    torch.cuda.manual_seed(NetSeed)  # 为当前GPU设置随机种子

    train_pair_loader, all_loader, divide_seed = loader(
        args.batch_size, args.neg_prop, args.aligned_prop, args.noisy_training,
        data_name[args.data])
    if args.data == 0:
        model = MvCLNfcScene().to(args.gpu)
    elif args.data == 1:
        model = MvCLNfcCaltech().to(args.gpu)
    elif args.data == 2:
        model = MvCLNfcReuters().to(args.gpu)
    elif args.data == 3:
        model = MvCLNfcMNIST().to(args.gpu)

    criterion = NoiseRobustLoss().to(args.gpu)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learn_rate)
    if not os.path.exists("./log/"):
        os.mkdir("./log/")
    path = os.path.join(
        "./log/" + str(data_name[args.data]) + "_" + 'time=' +
        time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
    os.mkdir(path)

    log_format = '%(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(path + '.txt')
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)
    logging.info(
        "******** Training begin, use RobustLoss: {}*m, use gpu {}, batch_size = {}, unaligned_prop = {}, NetSeed = {}, DivSeed = {} ********"
        .format(args.robust * args.switching_time, args.gpu, args.batch_size,
                (1 - args.aligned_prop), NetSeed, divide_seed))

    CAR_list = []
    acc_list, nmi_list, ari_list = [], [], []
    train_time = 0
    # train
    for i in range(0, args.epochs + 1):
        if i == 0:
            with torch.no_grad():
                pos_dist_mean, neg_dist_mean, false_neg_dist_mean, true_neg_dist_mean, epoch_time = train(
                    train_pair_loader, model, criterion, optimizer, i, args)
        else:
            pos_dist_mean, neg_dist_mean, false_neg_dist_mean, true_neg_dist_mean, epoch_time = train(
                train_pair_loader, model, criterion, optimizer, i, args)
        train_time += epoch_time
        pos_dist_mean_list.append(pos_dist_mean.item())
        neg_dist_mean_list.append(neg_dist_mean.item())
        true_neg_dist_mean_list.append(true_neg_dist_mean.item())
        false_neg_dist_mean_list.append(false_neg_dist_mean.item())

        # test
        v0, v1, pred_label, alignment_rate = tiny_infer(
            model, args.gpu, all_loader)
        CAR_list.append(alignment_rate)
        data = []
        data.append(v0)
        data.append(v1)
        y_pred, ret = Clustering(data, pred_label)
        if i % 10 == 0:
            logging.info("******** testing ********")
            logging.info("CAR={}, kmeans: acc={}, nmi={}, ari={}".format(
                round(alignment_rate, 4), ret['kmeans']['accuracy'],
                ret['kmeans']['NMI'], ret['kmeans']['ARI']))
        acc_list.append(ret['kmeans']['accuracy'])
        nmi_list.append(ret['kmeans']['NMI'])
        ari_list.append(ret['kmeans']['ARI'])

    # plot(acc_list, nmi_list, ari_list, CAR_list, args, data_name[args.data])
    logging.info('******** End, training time = {} s ********'.format(
        round(train_time, 2)))
Esempio n. 23
0
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

from keras.models import Model
import keras
import numpy as np
import data_loader
from keras.models import load_model

model = load_model('embedding_model')

layer_model = Model(inputs=model.input,
                    outputs=model.get_layer('embedding').output)

sampler = data_loader.loader()

for i, j in zip([
        './Data/Finetune_data_SZtaxi/SZtaxi_finetune',
        './Data/Finetune_data_NYbike/NYbike_finetune',
        './Data/Finetune_data_SZtaxi/SZtaxi_test',
        './Data/Finetune_data_NYbike/NYbike_test'
], ['finetune-SZtaxi', 'finetune-NYbike', 'test-SZtaxi', 'test-NYbike']):
    x, names, y = sampler.sample_embedding(type=j, short_term_lstm_seq_len=7)

    feature = np.array(layer_model.predict(x=x, ))

    print(feature.shape)

    np.save(i + '_embedding', feature)
    print(j)
    print(np.array(x).shape)
Esempio n. 24
0
def plot_bench(bench):
	tgt = os.path.join(store_dir, bench)
	if not os.path.exists(tgt):
		os.mkdir(tgt)
	loader = data_loader.loader()
	loader.load_pathes(pathes, only_benchs=[bench])
	# get the argvariations between all arguments. Use this to filter the
	# data for the plots
	args = loader.get_arg_variation(bench)
	if len(args) > 0:
		max_args_differ = 0
		for i in args:
			max_args_differ = max(max_args_differ, max([len(k) for k in i]))
	else:
		max_args_differ = 0
	max_depth = 2
	max_args_differ = min(max_depth, max_args_differ)
	data = {}
	monitor = {}
	# iterate through all detected arg combinations and gather the data and reformat it
	for argsetting in args:
		pre_args = argsetting[0]
		args = argsetting[1]
		post_args = argsetting[2]
		pre_filters = arg_to_filter(pre_args)
		filters = arg_to_filter(args)
		post_filters = arg_to_filter(post_args)
		dat = loader.get_filtered_data(bench=bench, pre_filters=pre_filters, filters=filters, post_filters=post_filters, no_regex=True)
		if dat == None:
			print("Error: Didn't find data for the given args")
			continue
		depth = 0
		path = []
		# create a hierarchy path for this argument combination, we have 3
		# hierarchy level, the last one is used for the system, so 2 remaining
		for i in argsetting[0] + argsetting[1] + argsetting[2]:
			if depth >= max_depth:
				if isinstance(i, list):
					path[max_depth] += ' '.join(i)
				else:
					path[max_depth] += i
				break
			depth += 1
			if isinstance(i, list):
				path.append(' '.join(i))
			else:
				path.append(i)
		while len(path) < max_args_differ:
			path.append('N/A')
		if len(dat) != 1:
			print("Error: we filtered " + str(len(dat.values())) + " instances instead of 1")
			print("Filters:")
			print(pre_args)
			print(args)
			print(post_args)
			continue
		# The first level in the dat dictionary holds the instancename,
		# the previous check shows that this has only one element...
		dat = list(dat.values())[0]
		for k,v in dat.items():
			r = {}
			mondat = {}
			for run in v['instance']['runs']:
				t = generic_data_colapser(run['results'])
				for tk, tv in t.items():
					if tk not in r:
						r[tk] = tv
					else:
						if isinstance(r[tk], list):
							r[tk].append(tv)
						else:
							r[tk] = [r[tk], tv]
				for monname, mon in run['monitors'].items():
					row = {}
					for m in mon['data']:
						t = generic_data_colapser(m['values'])
						for tk, tv in t.items():
							if tk not in row:
								row[tk] = [[m['time'], tv]]
							else:
								row[tk].append([m['time'], tv])
					if monname not in mondat:
						mondat[monname] = {}
					for rk, rv in row.items():
						if rk not in mondat[monname]:
							mondat[monname][rk] = []
						mondat[monname][rk].append(rv)
			for rk, rv in r.items():
				dptr = data
				cmpl_path = [rk] + path + [k]
				for i in cmpl_path[:-1]:
					if i not in dptr:
						dptr[i] = {}
					dptr = dptr[i]
				dptr[cmpl_path[-1]] = rv
			for mk, mv in mondat.items():
				for mk2, mv2 in mv.items():
					dptr = monitor
					cmpl_path = [mk, mk2] + path + [k]
					for i in cmpl_path[:-1]:
						if i not in dptr:
							dptr[i] = {}
						dptr = dptr[i]
					dptr[cmpl_path[-1]] = mv2
		dat = None
	result_tgt = os.path.join(tgt, 'results')
	if not os.path.exists(result_tgt):
		os.mkdir(result_tgt)
	monitor_tgt = os.path.join(tgt, 'monitors')
	if not os.path.exists(monitor_tgt):
		os.mkdir(monitor_tgt)
	for k,v in data.items():
		plot_utils.plot_bar_chart(v)
		plt.title(bench + ' ' + k)
		plt.grid(axis='y')
		_savefig(os.path.join(result_tgt, k))
	def plot_mon(path, data, try_level_colapse=False):
		plot_it = False
		level_colapse = False
		if try_level_colapse and isinstance(data, dict) and isinstance(list(data.values())[0], dict) and isinstance(list(list(data.values())[0].values())[0], list):
			max_l1 = len(data)
			max_l2 = 0
			max_combos_to_plot = 15
			for k,v in data.items():
				max_l2 = max(max_l2, len(v))
			if max_l1 * max_l2 < max_combos_to_plot:
				plot_it = True
				level_colapse = True
		if not plot_it:
			for k,v in data.items():
				if isinstance(v, list):
					plot_it = True
					break
				plot_mon(path + '_' + k, v, try_level_colapse)
		if not plot_it:
			return
		if level_colapse:
			tmp = {}
			for k,v in data.items():
				for k2,v2 in v.items():
					tmp[k+' '+k2] = v2
			data = tmp
		else:
			if try_level_colapse:
				return
		max_times = []
		for k,v in data.items():
			for ri in range(0, len(v)):
				row = v[ri]
				starttime = row[0][0]
				for i in range(0, len(row)):
					row[i][0] -= starttime
				if ri >= len(max_times):
					max_times.append(0)
				max_times[ri] = max(max_times[ri], row[-1][0] + 1)
		ct = 0
		for i in range(0, len(max_times)):
			tmp = ct
			ct += max_times[i]
			max_times[i] = tmp
		for k in data.keys():
			tmp = []
			for ri in range(0, min(monitor_plot_max_runs, len(data[k]))):
				row = data[k][ri]
				if len(tmp) > 0:
					tmp.append([tmp[-1][0]+1, row[0][1]])
				for i in row:
					tmp.append((i[0]+max_times[ri], i[1]))
			data[k] = tmp
			last_tmp = tmp
		plot_utils.plot_line_chart(data)
		plt.grid(axis='y')
		_savefig(path)

	for k,v in monitor.items():
		t = os.path.join(monitor_tgt, k)
		if not os.path.exists(t):
			os.mkdir(t)
		t = os.path.join(t, 'mon')
		plot_mon(t, copy.deepcopy(v), try_level_colapse=True)
		plot_mon(t, v, try_level_colapse=False)
Esempio n. 25
0
def do_evaluate(sess, args):
    with tf.device('/cpu:0'):
        # Images and labels placeholders
        images_ph = tf.placeholder(tf.float32,
                                   shape=(None, ) + tuple(args.processed_size),
                                   name='input')
        labels_ph = tf.placeholder(tf.int32, shape=(None), name='label')

        # a placeholder for determining if we train or validate the network. This placeholder will be used to set dropout rates and batchnorm paramaters.
        is_training_ph = tf.placeholder(tf.bool, name='is_training')

        # build a deep learning model using the provided configuration
        dnn_model = model(images_ph, labels_ph, utils.loss, None, 0.0,
                          args.architecture, args.num_classes, is_training_ph,
                          args.transfer_mode)

        # creating an input pipeline to read data from disk
        # a placeholder for setting the input pipeline batch size. This is employed to ensure that we feed each validation example only once to the network.
        batch_size_tf = tf.placeholder_with_default(args.batch_size, shape=())

        # a data loader pipeline to read test data
        val_loader = loader(args.val_info,
                            args.delimiter,
                            args.raw_size,
                            args.processed_size,
                            False,
                            batch_size_tf,
                            args.num_prefetch,
                            args.num_threads,
                            args.path_prefix,
                            inference_only=args.inference_only)

        # if we want to do inference only (i.e. no label is provided) we only load images and their paths
        if not args.inference_only:
            val_images, val_labels, val_info = val_loader.load()
        else:
            val_images, val_info = val_loader.load()

        # get evaluation operations from the dnn model
        eval_ops = dnn_model.evaluate_ops(args.inference_only)

        # Build an initialization operation to run below.
        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())
        sess.run(init)

        # Load pretrained parameters from disk
        dnn_model.load(sess, args.log_dir)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # evaluation
        if not args.inference_only:
            total_loss = utils.AverageMeter()  # Measures cross entropy loss
            top1 = utils.AverageMeter()  # Measures top-1 accuracy
            topn = utils.AverageMeter()  # Measures top-n accuracy

            # Open an output file to write predictions
            out_file = open(args.save_predictions, 'w')
            predictions_format_str = ('%d, %s, %s, %s, %s\n')

            for step in range(args.num_val_batches):
                # Load a batch of data
                val_img, val_lbl, val_inf = sess.run(
                    [val_images, val_labels, val_info],
                    feed_dict={
                        batch_size_tf: args.num_val_samples % args.batch_size
                    } if step == args.num_val_batches - 1 else None)

                # Evaluate the network on the loaded batch
                val_loss, top1_predictions, topn_predictions, topnguesses, topnconf = sess.run(
                    eval_ops,
                    feed_dict={
                        images_ph: val_img,
                        labels_ph: val_lbl,
                        is_training_ph: False
                    },
                    options=args.run_options,
                    run_metadata=args.run_metadata)

                current_batch_size = val_lbl.shape[0]
                total_loss.update(val_loss, current_batch_size)
                top1.update(top1_predictions, current_batch_size)
                topn.update(topn_predictions, current_batch_size)
                print(
                    'Batch Number: %d of %d, Top-1 Hit: %d, Top-%d Hit: %d, Loss %.2f, Top-1 Accuracy: %.2f, Top-%d Accuracy: %.2f'
                    %
                    (step, args.num_val_batches, top1.sum, args.top_n,
                     topn.sum, total_loss.avg, top1.avg, args.top_n, topn.avg))

                # log results into an output file
                for i in range(0, val_inf.shape[0]):
                    out_file.write(
                        predictions_format_str %
                        (step * args.batch_size + i + 1, val_inf[i],
                         val_loader.label_dict[val_lbl[i]], ', '.join(
                             '%d' % item
                             for item in topnguesses[i]), ', '.join(
                                 '%.4f' % item for item in topnconf[i])))
                    out_file.flush()

            out_file.close()
        #inference
        else:

            # Open an output file to write predictions
            out_file = open(args.save_predictions, 'w')
            predictions_format_str = ('%d, %s, %s, %s\n')

            for step in range(args.num_val_batches):
                # Load a batch of data
                val_img, val_inf = sess.run(
                    [val_images, val_info],
                    feed_dict={
                        batch_size_tf: args.num_val_samples % args.batch_size
                    } if step == args.num_val_batches - 1 else None)

                # Run the network on the loaded batch
                topnguesses, topnconf = sess.run(
                    eval_ops,
                    feed_dict={
                        images_ph: val_img,
                        is_training_ph: False
                    },
                    options=args.run_options,
                    run_metadata=args.run_metadata)
                print('Batch Number: %d of %d is done' %
                      (step, args.num_val_batches))

                # Log to an output file
                for i in range(0, val_inf.shape[0]):
                    out_file.write(
                        predictions_format_str %
                        (step * args.batch_size + i + 1, val_inf[i], ', '.join(
                            '%d' % item for item in topnguesses[i]), ', '.join(
                                '%.4f' % item for item in topnconf[i])))
                    out_file.flush()

            out_file.close()

        coord.request_stop()
        coord.join(threads)
        sess.close()
Esempio n. 26
0
def do_train(sess, args):
    # set CPU as the default device for the graph. Some of the operations will be moved to GPU later.
    with tf.device('/cpu:0'):

        # Images and labels placeholders
        images_ph = tf.placeholder(tf.float32,
                                   shape=(None, ) + tuple(args.processed_size),
                                   name='input')
        labels_ph = tf.placeholder(tf.int32, shape=(None), name='label')

        # a placeholder for determining if we train or validate the network. This placeholder will be used to set dropout rates and batchnorm paramaters.
        is_training_ph = tf.placeholder(tf.bool, name='is_training')

        #epoch number
        epoch_number = tf.get_variable(
            'epoch_number', [],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False,
            collections=[tf.GraphKeys.GLOBAL_VARIABLES, SAVE_VARIABLES])
        global_step = tf.get_variable(
            'global_step', [],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False,
            collections=[tf.GraphKeys.GLOBAL_VARIABLES, SAVE_VARIABLES])

        # Weight Decay policy
        wd = utils.get_policy(args.WD_policy, args.WD_details)

        # Learning rate decay policy (if needed)
        lr = utils.get_policy(args.LR_policy, args.LR_details)

        # Create an optimizer that performs gradient descent.
        optimizer = utils.get_optimizer(args.optimizer, lr)

        # build the computational graph using the provided configuration.
        dnn_model = model(images_ph,
                          labels_ph,
                          utils.loss,
                          optimizer,
                          wd,
                          args.architecture,
                          args.num_classes,
                          is_training_ph,
                          args.transfer_mode,
                          num_gpus=args.num_gpus)

        # Create a pipeline to read data from disk
        # a placeholder for setting the input pipeline batch size. This is employed to ensure that we feed each validation example only once to the network.
        # Because we only use 1 GPU for validation, the validation batch size should not be more than 512.
        batch_size_tf = tf.placeholder_with_default(min(512, args.batch_size),
                                                    shape=())

        # A data loader pipeline to read training images and their labels
        train_loader = loader(args.train_info, args.delimiter, args.raw_size,
                              args.processed_size, True,
                              args.chunked_batch_size, args.num_prefetch,
                              args.num_threads, args.path_prefix, args.shuffle)
        # The loader returns images, their labels, and their paths
        images, labels, info = train_loader.load()

        # If validation data are provided, we create an input pipeline to load the validation data
        if args.run_validation:
            val_loader = loader(args.val_info, args.delimiter, args.raw_size,
                                args.processed_size, False, batch_size_tf,
                                args.num_prefetch, args.num_threads,
                                args.path_prefix)
            val_images, val_labels, val_info = val_loader.load()

        # Get training operations to run from the deep learning model
        train_ops = dnn_model.train_ops()

        # Build an initialization operation to run below.
        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())
        sess.run(init)

        if args.retrain_from is not None:
            dnn_model.load(sess, args.retrain_from)

        # Set the start epoch number
        start_epoch = sess.run(epoch_number + 1)

        # Start the queue runners.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # Setup a summary writer
        summary_writer = tf.summary.FileWriter(args.log_dir, sess.graph)

        # The main training loop
        for epoch in range(start_epoch, start_epoch + args.num_epochs):

            # update epoch_number
            sess.run(epoch_number.assign(epoch))

            print("Epoch %d of %d started" %
                  (epoch, start_epoch + args.num_epochs - 1))
            # Trainig batches
            for step in range(args.num_batches):
                sess.run(global_step.assign(step + epoch * args.num_batches))
                # train the network on a batch of data (It also measures time)
                start_time = time.time()

                # load a batch from input pipeline
                img, lbl = sess.run([images, labels],
                                    options=args.run_options,
                                    run_metadata=args.run_metadata)

                # train on the loaded batch of data
                _, loss_value, top1_accuracy, topn_accuracy = sess.run(
                    train_ops,
                    feed_dict={
                        images_ph: img,
                        labels_ph: lbl,
                        is_training_ph: True
                    },
                    options=args.run_options,
                    run_metadata=args.run_metadata)
                duration = time.time() - start_time

                # Check for errors
                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                # Logging every ten batches and writing tensorboard summaries every hundred batches
                if step % 10 == 0:

                    num_examples_per_step = args.chunked_batch_size * args.num_gpus
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = duration / args.num_gpus

                    # Log
                    format_str = (
                        '%s: epoch %d of %d, step %d of %d, loss = %.2f, Top-1 = %.2f Top-'
                        + str(args.top_n) +
                        ' = %.2f (%.1f examples/sec; %.3f sec/batch)')
                    print(
                        format_str %
                        (datetime.now(), epoch, start_epoch + args.num_epochs -
                         1, step, args.num_batches, loss_value, top1_accuracy,
                         topn_accuracy, examples_per_sec, sec_per_batch))
                    sys.stdout.flush()

                if step % 100 == 0:
                    summary_str = sess.run(tf.summary.merge_all(),
                                           feed_dict={
                                               images_ph: img,
                                               labels_ph: lbl,
                                               is_training_ph: True
                                           })
                    summary_writer.add_summary(summary_str,
                                               args.num_batches * epoch + step)
                    if args.log_debug_info:
                        summary_writer.add_run_metadata(
                            run_metadata, 'epoch%d step%d' % (epoch, step))

            # Save the model checkpoint periodically after each training epoch
            checkpoint_path = os.path.join(args.log_dir, args.snapshot_prefix)
            dnn_model.save(sess, checkpoint_path, global_step=epoch)

            print("Epoch %d of %d ended. a checkpoint saved at %s" %
                  (epoch, start_epoch + args.num_epochs - 1, args.log_dir))
            sys.stdout.flush()
            # if validation data are provided, evaluate accuracy on the validation set after the end of each epoch
            if args.run_validation:

                print("Evaluating on validation set")
                total_loss = utils.AverageMeter(
                )  # Measures cross entropy loss
                top1 = utils.AverageMeter()  # Measures top-1 accuracy
                topn = utils.AverageMeter()  # Measures top-n accuracy

                # The validation loop
                for step in range(args.num_val_batches):
                    # Load a batch of data
                    val_img, val_lbl = sess.run(
                        [val_images, val_labels],
                        feed_dict={
                            batch_size_tf:
                            args.num_val_samples % min(512, args.batch_size)
                        } if step == args.num_val_batches - 1 else None,
                        options=args.run_options,
                        run_metadata=args.run_metadata)

                    # validate the network on the loaded batch
                    val_loss, top1_predictions, topn_predictions = sess.run(
                        [train_ops[1], train_ops[2], train_ops[3]],
                        feed_dict={
                            images_ph: val_img,
                            labels_ph: val_lbl,
                            is_training_ph: False
                        },
                        options=args.run_options,
                        run_metadata=args.run_metadata)

                    current_batch_size = val_lbl.shape[0]
                    total_loss.update(val_loss, current_batch_size)
                    top1.update(top1_predictions, current_batch_size)
                    topn.update(topn_predictions, current_batch_size)

                    if step % 10 == 0 or step == args.num_val_batches - 1:
                        print(
                            "Validation step %d of %d, Loss %.2f, Top-1 Accuracy %.2f, Top-%d Accuracy %.2f "
                            % (step, args.num_val_batches, total_loss.avg,
                               top1.avg, args.top_n, topn.avg))
                        sys.stdout.flush()

        coord.request_stop()
        coord.join(threads)
        sess.close()
import cv2
import numpy as np
from data_loader import loader
from model import Models
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
w = 240
h = 240
c = 3

mod = Models(w, h, c)
auto_encoder = mod.Arch2()
load_img = loader()
auto_encoder.summary()
x_data, y_data = load_img.load('stone', 'paper', 'scissor')
x_data = np.array(x_data, dtype='float') / 255.0
y_data = np.array(y_data, dtype='float') / 255.0
opt = Adam(lr=0.001, decay=0.001 / 50)
train_x, test_x, train_y, test_y = train_test_split(x_data,
                                                    y_data,
                                                    test_size=0.1,
                                                    random_state=30)
auto_encoder.compile(optimizer='adadelta', loss='binary_crossentropy')
auto_encoder.fit(train_x,
                 train_y,
                 batch_size=32,
                 shuffle='true',
                 epochs=15,
                 validation_data=(test_x, test_y),
                 verbose=1)
Esempio n. 28
0
import data_loader as dl

if __name__ == '__main__':
    dl.loader()

Esempio n. 29
0
    def train_stage_1(self):
        data_set = loader(None, {"seed": 10, "mode": "training"})
        data_set_test = loader(None, {
            "seed": 10,
            "mode": "test"
        }, data_set.index)
        data_set_eval = loader(None, {
            "seed": 10,
            "mode": "eval"
        }, data_set.index)
        data_loader = DataLoader(data_set,
                                 self.batch,
                                 True,
                                 collate_fn=call_back.detection_collate_RPN,
                                 num_workers=0)
        data_loader_test = DataLoader(
            data_set_test,
            self.batch,
            False,
            collate_fn=call_back.detection_collate_RPN,
            num_workers=0,
        )

        # optim = Adadelta(self.RPN.parameters(), lr=lr1, weight_decay=1e-5)
        optim = Adadelta(self.RPN.parameters(), lr=self.lr1, weight_decay=1e-5)

        tool = rpn_tool_d()
        start_time = time.time()
        # print(optim.state_dict())
        for epoch in range(3000):
            runing_losss = 0.0
            cls_loss = 0
            coor_loss = 0

            for data in data_loader:
                y = data[1]
                x = data[0].cuda()

                optim.zero_grad()
                with torch.no_grad():
                    x1, x2, x3, x4 = self.features(x)
                predict_confidence, box_predict = self.RPN(x1, x2, x3, x4)
                cross_entropy, loss_box = tool.get_proposal(
                    predict_confidence, box_predict, y)
                loss_total = cross_entropy + loss_box
                loss_total.backward()
                optim.step()
                runing_losss += loss_total.item()
                cls_loss += cross_entropy.item()
                coor_loss += loss_box.item()
            end_time = time.time()
            # self.vis.line(np.asarray([cls_loss, coor_loss]).reshape(1, 2),
            #               np.asarray([epoch] * 2).reshape(1, 2), win="loss-epoch", update='append',
            #               opts=dict(title='loss', legend=['cls_loss', 'cor_loss']))
            print(
                "epoch:{a}: loss:{b:.4f} spend_time:{c:.4f} cls:{d:.4f} cor{e:.4f} date:{ff}"
                .format(a=epoch,
                        b=runing_losss,
                        c=int(end_time - start_time),
                        d=cls_loss,
                        e=coor_loss,
                        ff=time.asctime()))
            start_time = end_time

            # if self.add_eval:
            #     p = self.RPN_eval(self,data_loader_eval, epoch, eval=True, seed=self.seed)
            self.RPN_eval(data_loader_test, {"epoch": epoch})

            save(self.RPN.module.state_dict(),
                 os.path.join(os.getcwd(),
                              str(epoch) + 'rpn_a1.p'))
            save(self.RPN.module.state_dict(),
                 os.path.join(os.getcwd(),
                              str(epoch) + 'base_a1.p'))

            if epoch % 10 == 0 and epoch > 0:
                adjust_learning_rate(optim, 0.9, epoch, 50, 0.3)
Esempio n. 30
0
    def train_stage_2(self):

        batch = 240
        lr1 = 0.15

        data_set = loader(os.path.join(os.getcwd(), 'data_2'), {"mode": "training"})
        data_set_test = loader(os.path.join(os.getcwd(), 'data_2'),{"mode": "test"}, data_set.index)
        data_set_eval = loader(os.path.join(os.getcwd(), 'data_2'),{"mode": "eval"}, data_set.index)

        data_loader = DataLoader(data_set, batch, True, collate_fn=call_back.detection_collate_RPN)
        data_loader_test = DataLoader(data_set_test, batch, False, collate_fn=call_back.detection_collate_RPN)
        data_loader_eval = DataLoader(data_set_eval, batch, False, collate_fn=call_back.detection_collate_RPN)

        # optim = Adadelta(self.ROI.parameters(), lr=lr1, weight_decay=1e-5)
        start_time = time.time()
        optim_a = Adadelta([{'params': self.pre.parameters()},
                            {'params': self.ROI.parameters()}], lr=0.15, weight_decay=1e-5)
        cfg.test = False
        count = 0
        for epoch in range(200):
            runing_losss = 0.0
            cls_loss = 0
            coor_loss = 0
            cls_loss2 = 0
            coor_loss2 = 0
            count += 1
            # base_time = RPN_time = ROI_time = nms_time = pre_gt = loss_time = linear_time = 0
            for data in data_loader:
                y = data[1]
                x = data[0].cuda()
                peak = data[2]
                num = data[3]
                optim_a.zero_grad()

                with torch.no_grad():
                    if self.flag >= 2:
                        result = self.base_process(x, y, peak)
                        feat1 = result['feat_8']
                        feat2 = result['feat_16']
                        feat3 = result['feat_32']
                        feat4 = result['feat_64']
                        label = result['label']
                        loss_box = result['loss_box']
                        cross_entropy = result['cross_entropy']

                cls_score = self.pre(feat1, feat2, feat3, feat4)
                cls_score = self.ROI(cls_score)

                cross_entropy2 = self.tool2.cal_loss2(cls_score, label)

                loss_total = cross_entropy2
                loss_total.backward()
                optim_a.step()
                runing_losss += loss_total.item()
                cls_loss2 += cross_entropy2.item()
                cls_loss += cross_entropy.item()
                coor_loss += loss_box.item()
            end_time = time.time()
            torch.cuda.empty_cache()
            print(
                "epoch:{a} time:{ff}: loss:{b:.4f} cls:{d:.4f} cor{e:.4f} cls2:{f:.4f} cor2:{g:.4f} date:{fff}".format(
                    a=epoch,
                    b=runing_losss,
                    d=cls_loss,
                    e=coor_loss,
                    f=cls_loss2,
                    g=coor_loss2, ff=int(end_time - start_time),
                    fff=time.asctime()))
            # if epoch % 10 == 0:
            #     adjust_learning_rate(optim, 0.9, epoch, 50, lr1)
            p = None

            # if epoch % 2 == 0:
            #     print("test result")
            # save(self.RPN.module.state_dict(),
            #      os.path.join(os.getcwd(), str(epoch) + 'rpn_a2.p'))
            # save(self.RPN.module.state_dict(),
            #      os.path.join(os.getcwd(), str(epoch) + 'base_a2.p'))
            start_time = end_time
        all_data = []
        all_label = []
        for data in data_loader:
            y = data[1]
            x = data[0].cuda()
            num = data[3]
            peak = data[2]
            with torch.no_grad():
                if self.flag >= 2:
                    result = self.base_process_2(x, y, peak)
                    data_ = result['x']
                    label = result['label']
                    loss_box = result['loss_box']
                    cross_entropy = result['cross_entropy']
                    all_data.extend(data_.cpu())
                    all_label.extend(label.cpu())
        for data in data_loader_eval:
            y = data[1]
            x = data[0].cuda()
            num = data[3]
            peak = data[2]
            with torch.no_grad():
                if self.flag >= 2:
                    result = self.base_process_2(x, y, peak)
                    data_ = result['x']
                    label = result['label']
                    loss_box = result['loss_box']
                    cross_entropy = result['cross_entropy']
                    all_data.extend(data_.cpu())
                    all_label.extend(label.cpu())
        for data in data_loader_test:
            y = data[1]
            x = data[0].cuda()
            num = data[3]
            peak = data[2]
            with torch.no_grad():
                if self.flag >= 2:
                    result = self.base_process_2(x, y, peak)
                    data_ = result['x']
                    label = result['label']
                    loss_box = result['loss_box']
                    cross_entropy = result['cross_entropy']
                    all_data.extend(data_.cpu())
                    all_label.extend(label.cpu())

        all_data = torch.stack(all_data, 0).numpy()
        all_label = torch.LongTensor(all_label).numpy()
        from imblearn.over_sampling import SMOTE
        fun = SMOTE()
        all_data, all_label = fun.fit_resample(all_data, all_label)
        total = len(all_label)
        training_label = all_label[:int(0.7 * total)]
        training_data = all_data[:int(0.7 * total)]

        test_label = all_label[-int(0.2 * total):]
        test_data = all_data[-int(0.2 * total):]
        count = 0
        self.ROI = roi().cuda()
        self.ROI = DataParallel(self.ROI, device_ids=[0])
        self.ROI.apply(weights_init)

        optim_b = Adadelta(self.ROI.parameters(), lr=0.15, weight_decay=1e-5)
        for epoch in range(1200):
            runing_losss = 0.0
            cls_loss = 0
            coor_loss = 0
            cls_loss2 = 0
            coor_loss2 = 0
            count += 1
            optim_b.zero_grad()
            optim_a.zero_grad()

            # base_time = RPN_time = ROI_time = nms_time = pre_gt = loss_time = linear_time = 0
            for j in range(int(len(training_label) / 240)):
                data_ = torch.Tensor(training_data[j * 240:j * 240 + 240]).view(240, 1024, 15).cuda()
                label_ = torch.LongTensor(training_label[j * 240:j * 240 + 240]).cuda()
                optim_b.zero_grad()

                cls_score = self.ROI(data_)
                cross_entropy2 = self.tool2.cal_loss2(cls_score, label_)

                loss_total = cross_entropy2
                loss_total.backward()
                optim_b.step()
                runing_losss += loss_total.item()
                cls_loss2 += cross_entropy2.item()
                cls_loss += cross_entropy.item()
                coor_loss += loss_box.item()
            end_time = time.time()
            torch.cuda.empty_cache()
            print(
                "epoch:{a} time:{ff}: loss:{b:.4f} cls:{d:.4f} cor{e:.4f} cls2:{f:.4f} cor2:{g:.4f} date:{fff}".format(
                    a=epoch,
                    b=runing_losss,
                    d=cls_loss,
                    e=coor_loss,
                    f=cls_loss2,
                    g=coor_loss2, ff=int(end_time - start_time),
                    fff=time.asctime()))
            if epoch % 10 == 0 and epoch > 0:
                adjust_learning_rate(optim_b, 0.9, epoch, 50, 0.3)

            p = None
            self.eval_(test_data, test_label)
            # self.ROI_eval(data_loader_eval, {"epoch": epoch})

            start_time = end_time
        print('finish')
from data_loader import loader
from model import Models
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.optimizers import Adam
import numpy as np
import sys

load_data = loader()
x_data, y_data, w = load_data.load()
print(x_data[:3])
w = w + w - 1  #no. of features
# print(w)
h = 1
model = Models(h, w)
auto_encoder = model.encode_decoder()
model.encode_decoder()

print(model.encode_decoder().summary())

print(x_data.shape)
# train_x,test_x,train_y,test_y=train_test_split(x_data,y_data,test_size=0.1,random_state=30)

train_x = x_data[:50]
train_y = train_x.copy()
test_x = x_data[50:]
test_y = test_x.copy()

model.fit(train_x, train_y, test_x, test_y)

model.save()