Ejemplo n.º 1
0
    def test_vgg():
        X_train, X_valid, Y_train, Y_valid = split_data(X, y)
        img_rows, img_cols, channels = X_train[0, :, :, :].shape

        model = vgg16_model(img_rows, img_cols, channels, num_classes)

        model.fit(
            X_train,
            Y_train,
            batch_size=batch_size,
            epochs=nb_epoch,
            shuffle=True,
            verbose=1,
            validation_data=(X_valid, Y_valid),
        )

        predictions_valid = model.predict(X_valid,
                                          batch_size=batch_size,
                                          verbose=1)

        score = log_loss(Y_valid, predictions_valid)
        print(score)
Ejemplo n.º 2
0
def main():
    image_size = 50
    number_of_classes = 12

    # cached_files = os.listdir('cache/')

    # if no cached features and labels exist locally create them and then cache them
    # if 'train_features.csv' not in cached_files or 'train_labels.csv' not in cached_files:

    features, labels, categories = load_train_data(train_data_path='./data/train/',
                                                   image_size=image_size)

    # TODO create a fast caching system
    # np.savetxt('cache/train_features.csv', train_features, delimiter=',', fmt='%.4f')
    # np.savetxt('cache/train_labels.csv', train_labels, delimiter=',', fmt='%i')

    # # if cached features and labels are detected load them into variables
    # else:
    #     train_features = np.genfromtxt('cache/train_features.csv', delimiter=',')
    #     print('training features loaded from cache')
    #     train_labels = np.genfromtxt('cache/train_labels.csv', delimiter=',')
    #     print('training labels loaded from cache')

    binary_training_labels = keras.utils.to_categorical(labels, num_classes=number_of_classes)

    train_features, train_labels, crosval_features, crosval_labels, test_features, test_labels = \
        split_data(features, binary_training_labels, train_fraction=0.9, crosval_fraction=0.0, test_fraction=0.1)

    reg_value = 0.02

    # building nn topology
    model = Sequential()
    model.add(Dense(units=2500,
                    activation='relu',
                    input_dim=image_size ** 2,
                    kernel_regularizer=regularizers.l2(reg_value)))

    model.add(Dense(units=300,
                    activation='relu',
                    kernel_regularizer=regularizers.l2(reg_value)))

    model.add(Dense(units=300,
                    activation='relu',
                    kernel_regularizer=regularizers.l2(reg_value)))

    model.add(Dense(units=number_of_classes,
                    activation='sigmoid',
                    kernel_regularizer=regularizers.l2(reg_value)))

    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

    # training_epochs = 200
    # model.fit(train_features, train_labels, epochs=training_epochs, batch_size=100)

    epoch = 0

    # hold historical training and test accuracy
    train_accuracy = {}
    test_accuracy = {}

    try:
        while epoch < 2000:
            model.fit(train_features, train_labels, epochs=1, batch_size=128)
            test_accuracy[epoch] = model.evaluate(test_features, test_labels, batch_size=128)[1]
            train_accuracy[epoch] = model.evaluate(train_features, train_labels, batch_size=128)[1]

            # TODO add sequential model saving

            print('\nepoch = %i\n' % epoch)

            epoch += 1

    except KeyboardInterrupt:
        pass

    # plotting training and test accuracy histories
    plt.plot(train_accuracy.keys(), train_accuracy.values(), label='train')
    plt.plot(test_accuracy.keys(), test_accuracy.values(), label='test')
    axes = plt.gca()
    # axes.set_ylim([0.8, 0.90])
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.legend()
    plt.show()

    test_accuracy = model.evaluate(test_features, test_labels, batch_size=1000)[1]
    print('trained model accuracy on test set = %f' % test_accuracy)
Ejemplo n.º 3
0
	plt.plot(list(range(num_train + len(valid), num_train + len(valid) + len(test))), test, color='y', label='predicted')
	plt.plot(list(range(num_train, num_train + len(valid))), valid, color='g', label='test data')
	plt.plot(list(range(num_train + len(valid), num_train + len(valid) + len(predictions))), predictions, color='r', label='predicted')
	plt.legend()
	#if filename is not None:
	#	plt.savefig(filename)
	#else:
	plt.show()


if __name__ == '__main__':
	seq_size = 20
	##Performing all the data operations	
	predictor = SeriesPredictor(input_dim = 1, seq_size = seq_size, hidden_dim = 100)
	data = data_loader.load_series('datanew.csv')
	train_data, valid_data, test_data = data_loader.split_data(data)

	'''
	print("How Train data looks like")
	for i in range(10):
		print(train_data[i])
	'''
	

	##Here we are making the data s.t the output at every time step is the value for the next time-step
	train_x, train_y = [], []
	for i in range(len(train_data) - seq_size - 1):
		##Expand_dims is used since we have to feed the network with an input that has first dimension as batch_size, second dimension as seq_length and third
		##dimension as input_dim
		##The first dimension is fulfilled by appending many lists to train_x, third dimension is fulfilled by using expand_dims		
		train_x.append(np.expand_dims(train_data[i : i + seq_size], axis = 1).tolist())
Ejemplo n.º 4
0
    plt.plot(list(range(num_train, num_train + len(actual))),
             actual,
             color='g',
             label='test data')
    plt.legend()  # 图例
    if filename is not None:
        plt.savefig(filename)
    else:
        plt.show()


if __name__ == '__main__':
    seq_size = 5
    predictor = SeriesPredictor(input_dim=1, seq_size=seq_size, hidden_dim=100)
    data = data_loader.load_series('international-airline-passengers.csv')
    train_data, actual_vals = data_loader.split_data(data)

    train_x, train_y = [], []
    # for i in range(len(train_data) - seq_size - 1):  # num - window_size + 1
    for i in range(len(train_data) - seq_size):
        train_x.append(
            np.expand_dims(
                train_data[i:i + seq_size],
                axis=1).tolist())  # shape=(batch, seq_size, input_dim)
        train_y.append(train_data[i + 1:i + seq_size + 1])

    test_x, test_y = [], []
    # for i in range(len(actual_vals) - seq_size - 1):
    for i in range(len(actual_vals) - seq_size):
        # temp = np.expand_dims(actual_vals[i:i + seq_size], axis=1)  # shape=(5, 1)
        test_x.append(
             label='predicted')
    plt.plot(list(range(num_train, num_train + len(actual))),
             actual,
             color='g',
             label='test data')
    plt.legend()
    plt.show()


if __name__ == '__main__':
    mode = 2  #0训练 1继续训练 2看结果

    seq_size = 60
    predictor = SeriesPredictor(input_dim=5, seq_size=seq_size, hidden_dim=50)
    data = data_loader.load_series('data_test.txt')
    train_data, actual_vals, sample = data_loader.split_data(data, seq_size)
    # print(train_data)
    # print(np.shape(train_data))
    train_x, train_y = [], []
    for i in range(len(train_data) - seq_size - 1):
        train_x.append(train_data[i:i + seq_size])
        train_y.append(train_data[i + seq_size + 1])
    # print(np.shape(train_x),np.shape(train_y))
    # print(train_y)
    # print(np.shape(train_y))
    test_x, test_y = [], []
    for i in range(len(actual_vals) - seq_size - 1):
        test_x.append(actual_vals[i:i + seq_size])
        test_y.append(actual_vals[i + seq_size + 1])

        #开始训练
Ejemplo n.º 6
0
    num_train = len(train_x)
    plt.plot(list(range(num_train)), train_x, color='b', label='training data')
    plt.plot(list(range(num_train, num_train + len(predictions))), predictions, color='r', label='predicted')
    plt.plot(list(range(num_train, num_train + len(actual))), actual, color='g', label='test data')
    plt.legend()
    if filename is not None:
        plt.savefig(filename)
    else:
        plt.show()


if __name__ == '__main__':
    seq_size = 5
    predictor = SeriesPredictor(input_dim=1, seq_size=seq_size, hidden_dim=5)
    data = data_loader.load_series('international-airline-passengers.csv')
    train_data, actual_vals = data_loader.split_data(data)

    train_x, train_y = [], []
    for i in range(len(train_data) - seq_size - 1):
        train_x.append(np.expand_dims(train_data[i:i+seq_size], axis=1).tolist())
        train_y.append(train_data[i+1:i+seq_size+1])

    test_x, test_y = [], []
    for i in range(len(actual_vals) - seq_size - 1):
        test_x.append(np.expand_dims(actual_vals[i:i+seq_size], axis=1).tolist())
        test_y.append(actual_vals[i+1:i+seq_size+1])

    predictor.train(train_x, train_y, test_x, test_y)

    with tf.Session() as sess:
        predicted_vals = predictor.test(sess, test_x)[:,0]
Ejemplo n.º 7
0
def main():
    # Set GPU memory usage
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
    K.set_session(session)
    tf.logging.set_verbosity(tf.logging.ERROR)

    args = get_args()
    IMG_SIZE = args.image_size
    input_path = args.input
    batch_size = args.batch_size
    nb_epochs = args.nb_epochs
    validation_split = args.validation_split
    dataset_name = args.dataset_name
    n_age_bins = args.class_num
    embdding = args.embdding
    lr = args.lr

    logging.debug("[INFO] Loading data...")

    data_loader = DataManager(dataset_name)
    ground_truth_data = data_loader.get_data()
    train_keys, val_keys = split_data(ground_truth_data,
                                      validation_split=validation_split,
                                      do_shuffle=True)

    print("Samples: Training - {}, Validation - {}".format(
        len(train_keys), len(val_keys)))
    input_shape = (IMG_SIZE, IMG_SIZE, 3)

    image_generator = ImageGenerator(ground_truth_data,
                                     batch_size,
                                     input_shape[:2],
                                     train_keys,
                                     val_keys,
                                     path_prefix=input_path,
                                     vertical_flip_probability=0,
                                     eraser_probability=0,
                                     bins=n_age_bins)

    model = facenet_resnet(nb_class=n_age_bins,
                           embdding=embdding,
                           is_train=True,
                           weights="./models/facenet_keras_weights.h5")
    model.compile(optimizer=optimizers.SGD(lr=lr,
                                           momentum=0.9,
                                           decay=5e-4,
                                           nesterov=False),
                  loss={
                      'pred_g': focal_loss(alpha=.4, gamma=2),
                      'pred_a': mae,
                      "pred_e": "categorical_crossentropy"
                  },
                  loss_weights={
                      'pred_g': 0.2,
                      'pred_a': 1,
                      'pred_e': 0.4
                  },
                  metrics={
                      'pred_g': 'accuracy',
                      'pred_a': mae,
                      'pred_e': 'accuracy'
                  })

    logging.debug("[INFO] Saving model...")

    mk_dir("checkpoints")

    callbacks = [
        CSVLogger(os.path.join('checkpoints', 'train.csv'), append=False),
        ModelCheckpoint(os.path.join(
            'checkpoints',
            'weights.{epoch:02d}-{val_pred_g_acc:.3f}-{val_pred_a_mae:.3f}-{val_pred_e_acc:.3f}.h5'
        ),
                        monitor="val_loss",
                        verbose=1,
                        save_best_only=True,
                        mode="min"),
        # Use Stochastic Gradient Descent with Restart
        # https://github.com/emrul/Learning-Rate
        # Based on paper SGDR: STOCHASTIC GRADIENT DESCENT WITH WARM RESTARTS
        # SGDRScheduler(min_lr=lr*((0.1)**3), max_lr=lr, steps_per_epoch=np.ceil(len(train_keys) /
        #                                                                        batch_size), lr_decay=0.9, cycle_length=5, mult_factor=1.5)

        # Use Cyclical Learning Rate
        # CyclicLR(mode='triangular', step_size=np.ceil(
        #     len(train_keys)/batch_size), base_lr=lr*((0.1)**3), max_lr=lr)
        LearningRateScheduler(PolyDecay(lr, 0.9, nb_epochs).scheduler)
    ]

    logging.debug("[INFO] Running training...")

    history = model.fit_generator(
        image_generator.flow(mode='train'),
        steps_per_epoch=np.ceil(len(train_keys) / batch_size),
        epochs=nb_epochs,
        callbacks=callbacks,
        validation_data=image_generator.flow('val'),
        validation_steps=np.ceil(len(val_keys) / batch_size))

    logging.debug("[INFO] Saving weights...")

    K.clear_session()
Ejemplo n.º 8
0
    channel_converted_value, channel_last_timestamp = create_lists_ccv_clt(
        file_increment_num, 27)

    channel_last_datetime = unix_timestamp_to_datetime(channel_last_timestamp)
    """
    f = open('clt_ccv.csv', 'w')
    for x,y in zip(channel_last_timestamp,channel_converted_value):
        f.write(str(x) + "," + str(y) +'\n')
    f.close()
    """
    seq_size = 5
    predictor = SeriesPredictor(input_dim=1, seq_size=seq_size, hidden_dim=100)

    data = data_loader.load_series('2_ccv_clt.csv')
    train_data, actual_vals = data_loader.split_data(data)

    reconditioned_data = data_loader.recondition_data(data)
    train_data_reconditioned, actual_vals_reconditioned = data_loader.split_data(
        reconditioned_data)

    train_data, actual_vals = data_loader.split_data(data)
    train_x, train_y = [], []
    for i in range(len(train_data) - seq_size - 1):
        train_x.append(
            np.expand_dims(train_data[i:i + seq_size], axis=1).tolist())
        train_y.append(train_data[i + 1:i + seq_size + 1])

    test_x, test_y = [], []
    for i in range(len(actual_vals) - seq_size - 1):
        test_x.append(