Beispiel #1
0
def main():
    (x_train, y_train), (x_validation, y_validation) = load_data()

    model = Model(*juxt(identity, computational_graph(y_train.shape[1]))(Input(shape=x_train.shape[1:])))
    model.compile(loss='categorical_crossentropy', optimizer=SGD(momentum=0.9), metrics=['accuracy'])

    model.summary()
    # plot_model(model, to_file='./results/model.png')

    train_data      = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, width_shift_range=0.125, height_shift_range=0.125, horizontal_flip=True)
    validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)

    for data in (train_data, validation_data):
        data.fit(x_train)  # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う……。

    batch_size = 100
    epochs     = 200

    results = model.fit_generator(train_data.flow(x_train, y_train, batch_size=batch_size),
                                  steps_per_epoch=x_train.shape[0] // batch_size,
                                  epochs=epochs,
                                  callbacks=[LearningRateScheduler(partial(getitem, tuple(take(epochs, concat(repeat(0.01, 1), repeat(0.1, 99), repeat(0.01, 50), repeat(0.001))))))],
                                  validation_data=validation_data.flow(x_validation, y_validation, batch_size=batch_size),
                                  validation_steps=x_validation.shape[0] // batch_size)

    with open('./results/history.pickle', 'wb') as f:
        pickle.dump(results.history, f)

    save_model(model, './results/model.h5')

    del model
Beispiel #2
0
def main():
    (waves, labels), (x_validate, y_validate) = load_data()

    x_mean = 4.3854903e-05  # np.concatenate(waves).mean()
    x_std = 0.042366702  # np.concatenate(waves).std()

    waves = tuple(map(lambda wave: (wave - x_mean) / x_std, waves))
    x_validate = (x_validate - x_mean) / x_std

    model = Model(
        *juxt(identity, computational_graph(max(y_validate) + 1))(Input(
            shape=x_validate.shape[1:])))
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()

    batch_size = 50
    epoch_size = 500

    results = model.fit_generator(
        data_generator(waves, labels, batch_size),
        steps_per_epoch=8000 // batch_size,
        epochs=epoch_size,
        validation_data=(x_validate, y_validate),
        callbacks=[ReduceLROnPlateau(factor=0.5, patience=50, verbose=1)])

    with open('./results/history.pickle', 'wb') as f:
        pickle.dump(results.history, f)

    save_model(model, './results/model.h5')

    del model
Beispiel #3
0
def print_predicts():
    _, (x, y) = load_data()
    x = (x - 4.3854903e-05) / 0.042366702

    model  = load_model('./results/model.h5', custom_objects={'ZeroPadding': ZeroPadding})
    y_pred = np.argmax(model.predict(x), axis=1)

    for t, p in zip(y, y_pred):
        print('{0}\t{1}'.format(labels[t], labels[p]))

    del model
Beispiel #4
0
def run(args):
    domains_summary()
    parameters_summary()

    train_params = {
        'batch_size': args.train_batch_size,
        'shuffle': args.train_data_set_shuffle
    }

    if args.model == 'AutoEncoder':
        ae_model = SimpleAutoEncoder(ast.literal_eval(args.autoencoder_shape))
        ae_model.summary()

        c_optimizer = torch.optim.SGD(ae_model.decoder.parameters(),
                                      lr=args.learning_rate)
        d_optimizer = torch.optim.SGD(ae_model.domain_classifier.parameters(),
                                      lr=args.learning_rate)
        f_optimizer = torch.optim.SGD(ae_model.encoder.parameters(),
                                      lr=args.learning_rate)

        c_scheduler = ReduceLROnPlateau(c_optimizer,
                                        mode='min',
                                        factor=args.reduce_lr_factor,
                                        patience=args.reduce_lr_patience)
        d_scheduler = ReduceLROnPlateau(d_optimizer,
                                        mode='min',
                                        factor=args.reduce_lr_factor,
                                        patience=args.reduce_lr_patience)
        #criterion = args.loss
        criterion = ReversalLoss()

        # src_domain_data_set, tgt_domain_data_set = load_data('kitchen', 'books', verbose=True)
        # words_to_reconstruct = get_unique_per_set_words(src_domain_data_set, tgt_domain_data_set)

        data_generator = as_one_dataloader(
            args.src_domain,
            args.tgt_domain,
            train_params,
            denoising_factor=args.denoising_factor
        )  #,  words_to_reconstruct=words_to_reconstruct)
        trainer = AutoEncoderTrainer(ae_model,
                                     criterion,
                                     c_optimizer,
                                     d_optimizer,
                                     f_optimizer,
                                     c_scheduler,
                                     d_scheduler,
                                     args.max_epochs,
                                     epochs_no_improve=args.epochs_no_improve)
        trainer.fit(data_generator)
        torch.save(ae_model.state_dict(), args.ae_model_file)
        print('Model was saved in {} file.'.format(args.ae_model_file))
    elif args.model == 'ATTFeedforward':
        src_domain_data_set, tgt_domain_data_set = load_data(args.src_domain,
                                                             args.tgt_domain,
                                                             verbose=True)
        train_generator, valid_generator, target_generator = train_valid_target_split(
            src_domain_data_set, tgt_domain_data_set, train_params)

        ae_model = None
        if args.auto_encoder_embedding is not None:
            ae_model = SimpleAutoEncoder(
                ast.literal_eval(args.autoencoder_shape))
            ae_model.load_state_dict(torch.load(args.auto_encoder_embedding))
            ae_model.set_train_mode(False)
            # don't froze the AutoEncoder!
            # ae_model.froze()
            ae_model.eval()

        attff_model = ATTFeedforward(args.attff_input_size,
                                     args.attff_hidden_size, ae_model)
        attff_model.summary()

        criterion = MultiViewLoss()
        criterion_t = CrossEntropyLoss()

        optimizer = torch.optim.Adam(attff_model.parameters(),
                                     lr=args.learning_rate)
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=args.reduce_lr_factor,
                                      patience=args.reduce_lr_patience)

        trainer = DomainAdaptationTrainer(
            attff_model,
            criterion,
            criterion_t,
            optimizer,
            scheduler,
            args.max_epochs,
            ae_model=ae_model,
            epochs_no_improve=args.epochs_no_improve)

        if args.load_attnn_model:
            attff_model.load_state_dict(
                torch.load(
                    args.attnn_model_file.format(args.attff_input_size,
                                                 args.attff_hidden_size)))
        else:
            trainer.fit(train_generator,
                        valid_generator,
                        target_generator=target_generator,
                        max_epochs=args.max_epochs)

            model_file = args.attnn_model_file.format(args.attff_input_size,
                                                      args.attff_hidden_size)
            torch.save(attff_model.state_dict(), model_file)
            print('Model was saved in {} file.'.format(model_file))

        trainer.pseudo_label(train_generator,
                             valid_generator,
                             tgt_domain_data_set,
                             iterations=args.pseudo_label_iterations,
                             train_params=train_params,
                             max_epochs=args.max_epochs)
Beispiel #5
0
from data_set import AmazonDomainDataSet, train_valid_target_split, load_data
from utils.data import build_dictionary


def get_unique_per_set_words(data_set1: AmazonDomainDataSet,
                             data_set2: AmazonDomainDataSet):

    dictionary = build_dictionary([data_set1, data_set2], 5000)

    set1_words = set()
    set2_words = set()
    for i, row in data_set1.data.iterrows():
        for word in row.acl_processed:
            if word[0] in dictionary:
                set1_words.add(word[0])

    for i, row in data_set2.data.iterrows():
        for word in row.acl_processed:
            if word[0] in dictionary:
                set2_words.add(word[0])

    return set([word for word in set1_words if word not in set2_words]).union(
        set([word for word in set2_words if word not in set1_words]))


src_domain_data_set, tgt_domain_data_set = load_data('books',
                                                     'kitchen',
                                                     verbose=True)
get_unique_per_set_words(src_domain_data_set, tgt_domain_data_set)
Beispiel #6
0
import data_set
import id3
import numpy as np

data, labels = data_set.load_data("data\\agaricus-lepiota.data")

# tree display prompt
display_trees = input("Display trees? (y/n): ")
id3_type = input("Algorytm zwykły czy z ruletką? (regular/roulette): ")

# data_set.display_data(data, 10, [i for i in range(23)])
f = 0.1
f = float(input("Podaj rozmiar zbioru treningowego (float): "))

training_data_length = int(len(data) * f)  # 0.5% of data is training data

k = 1
#numer of repeats
k = int(input("Podaj ilość testów (int): "))

while (k > 0):

    #randomize data order
    np.random.shuffle(data)

    training_data = data[:training_data_length, 1:]
    training_labels = data[:training_data_length, 0]

    id3_tree = id3.build_tree(training_labels,
                              training_data,
                              variation=id3_type)  #roulette or regular
def main():
    import os
    with tf.device("/cpu:0"):
        (x_train, y_train), (x_validation, y_validation) = load_data()

    batch_size = 32
    epochs = 200
    input_shape = Input(shape=x_train.shape[1:])
    model_file = './results/model.h5'
    if os.path.exists(model_file):
        model = load_model(model_file)
        # with tf.device("/cpu:0"):
        #     validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
    else:
        model = Model(*juxt(identity, computational_graph(y_train.shape[1]))(
            input_shape))
        model.compile(loss='categorical_crossentropy',
                      optimizer=SGD(momentum=0.9),
                      metrics=['accuracy'])

        with tf.device("/cpu:0"):
            train_data = ImageDataGenerator(featurewise_center=True,
                                            featurewise_std_normalization=True,
                                            width_shift_range=0.125,
                                            height_shift_range=0.125,
                                            horizontal_flip=True)
            validation_data = ImageDataGenerator(
                featurewise_center=True, featurewise_std_normalization=True)

        for data in (train_data, validation_data):
            data.fit(
                x_train)  # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う……。

        results = model.fit_generator(
            train_data.flow(x_train, y_train, batch_size=batch_size),
            steps_per_epoch=x_train.shape[0] // batch_size,
            epochs=epochs,
            callbacks=[
                LearningRateScheduler(
                    partial(
                        getitem,
                        tuple(
                            take(
                                epochs,
                                concat(repeat(0.01, 1), repeat(0.1, 99),
                                       repeat(0.01, 50), repeat(0.001))))))
            ],
            validation_data=validation_data.flow(x_validation,
                                                 y_validation,
                                                 batch_size=batch_size),
            validation_steps=x_validation.shape[0] // batch_size)

        with open('./results/history.pickle', 'wb') as f:
            pickle.dump(results.history, f)
        save_model(model, model_file)

    try:
        with tf.device("/cpu:0"):
            # model.summary()
            # print("=== AFTER POPPING THE LAST ===")
            model.layers.pop()
            # model.summary()
            # generate_confusion_matrix(model, x_validation, y_validation, batch_size)
            # plot_model(model, to_file='./results/model.png')
    except Exception as ex:
        print("plot_model failed with error:", repr(ex), "\nMoving on...")

    siamese(input_shape, model)
Beispiel #8
0
            model.label: np.expand_dims(y, axis=1),
            model.dropout_keep_fm: [1.0] * len(dfm_params['dropout_fm']),
            model.dropout_keep_deep: [1.0] * len(dfm_params['dropout_deep']),
            model.train_phase: False
        }
        logits = sess.run([model.out], feed_dict=feed_dict)
        #最后一个batch大小可能与之前的不一样
        if i == 0:
            y_pred = np.reshape(logits[0], [-1, 1])
        else:
            y_pred = np.concatenate((y_pred, np.reshape(logits[0], [-1, 1])))
    return y_pred


if __name__ == '__main__':
    train_df, test_df, x_train, y_train, x_test, ids_test, cat_features_indices = load_data(
    )
    print(train_df)
    # k折交叉验证
    folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS, shuffle=True,\
                             random_state=config.RANDOM_SEED).split(x_train, y_train))

    fd = FeatureDictionary(dfTrain=train_df, dfTest=test_df,\
                           numeric_cols=config.NUMERIC_COLS,\
                           ignore_cols=config.IGNORE_COLS)

    #
    data_parser = DataParser(feat_dict=fd)
    #Xi_train训练数据中特征编号,Xv_train是Xi_train对应的特征编号下的特征值,连续型特征则是该特征值
    Xi_train, Xv_train, y_train = data_parser.parse(df=train_df,
                                                    has_label=True)
    Xi_test, Xv_test, ids_test = data_parser.parse(df=test_df)