Python augment_data Exemples, data.augment_data Python Exemples

Exemple #1

0

Afficher le fichier

def run_CV(warming):
    #You have to "pull in" the globally scoped variables
    global model, cam, frame, start, i, fps
    global frame
    s, im = cam.read()
    im = data.augment_data(im)
    im = im.reshape((1, IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
    y = model.predict(im)
    frame += 1
    y = data.norm_label(y, -1, 1, 1044, 1750)
    fin = time()
    if fin - start >= 1:
        i += 1
        fps = round(frame / i)
        start = fin
    if warming:
        pass

    else:
        #ser.write(('steer,'+str(y)+"\n").encode())
        print("FPS: {} Steering: {}".format(fps, int(y)))

Exemple #2

0

Afficher le fichier

    def build_training_data_loader(self) -> keras.InputData:
        """
        In this example we added some fields of note under the `data` field in
        the YAML experiment configuration: the `acceleration` field. Under this
        field, you can configure multithreading by setting
        `use_multiprocessing` to `False`, or set it to `True` for
        multiprocessing. You can also configure the number of workers
        (processes or threads depending on `use_multiprocessing`).

        Another thing of note are the data augmentation fields in
        hyperparameters. The fields here get passed through to Keras'
        `ImageDataGenerator` for real-time data augmentation.
        """
        if not self.data_downloaded:
            self.download_directory = download_cifar10_tf_sequence(
                download_directory=self.download_directory,
                url=self.context.get_data_config()["url"],
            )
            self.data_downloaded = True

        hparams = self.context.get_hparams()
        width_shift_range = hparams.get("width_shift_range", 0.0)
        height_shift_range = hparams.get("height_shift_range", 0.0)
        horizontal_flip = hparams.get("horizontal_flip", False)
        batch_size = self.context.get_per_slot_batch_size()

        (train_data, train_labels), (_, _) = get_data(self.download_directory)

        # Setup training data loader.
        data_augmentation = {
            "width_shift_range": width_shift_range,
            "height_shift_range": height_shift_range,
            "horizontal_flip": horizontal_flip,
        }

        # Returns a tf.keras.Sequence.
        train = augment_data(train_data, train_labels, batch_size,
                             data_augmentation)

        return train

Exemple #3

0

Afficher le fichier

Fichier : cnn_scaled_up.py Projet : alanli-ca/cnn-fashion-mnist

def main():
    # reset tf graph
    tf.reset_default_graph()

    # get model configuration
    model_configs = model_config.cnn_2x_scale

    # load data
    train, valid, test = data.load_data(
        n_train_samples_per_class=model_configs['n_train_samples_per_class'],
        classes=np.asarray(model_configs['classes']))
    train._images = data.augment_data(train.images, augment_type="SCALE_UP")
    valid._images = data.augment_data(valid.images, augment_type="SCALE_UP")
    test._images = data.augment_data(test.images, augment_type="SCALE_UP")

    # get number of samples per dataset
    n_train_samples = train.images.shape[0]
    n_valid_samples = valid.images.shape[0]
    n_test_samples = test.images.shape[0]

    # define input and output
    input_width = model_configs['input_width']
    n_input = model_configs['n_input']
    n_classes = np.asarray(model_configs['classes']).shape[0]

    # define training hyper-parameters
    n_epochs = model_configs['n_epochs']
    minibatch_size = model_configs['minibatch_size']
    learning_rate = model_configs['learning_rate']
    regularization_term = model_configs['regularization_term']
    keep_probability = model_configs['keep_probability']

    #define conv layer architecture
    filter_size = model_configs['filter_size']
    num_filters = model_configs['num_filters']
    conv_stride = model_configs['conv_stride']
    max_pool_stride = model_configs['max_pool_stride']
    pool_size = model_configs['pool_size']
    padding = model_configs['padding']

    # define FC NN architecture
    fc1_size = model_configs['fc1_size']
    fc2_size = model_configs['fc2_size']

    # define visualziation parameters
    vis_layers = np.arange(0, 8)  # selected filter visualization layers

    # define placeholders
    X = tf.placeholder(tf.float32, shape=(None, n_input), name="X")
    y = tf.placeholder(tf.int32, shape=(None, n_classes), name="y")
    keep_prob = tf.placeholder(tf.float32)

    # input reshaping
    X_image = tf.reshape(X, [-1, input_width, input_width, 1])

    # convolutional layer 1
    with tf.variable_scope("conv_1"):
        W_conv1 = weight_variable([filter_size, filter_size, 1, num_filters])
        b_conv1 = bias_variable([num_filters])
        h_conv1 = tf.nn.relu(
            conv2d(X_image, W_conv1, stride=conv_stride, padding=padding) +
            b_conv1)

    # convolutional output dimension
    conv_out_dim = np.int(
        np.floor((input_width - filter_size) / conv_stride + 1))
    # max pooling output dimension
    max_pool_out_dim = np.int(
        np.floor((conv_out_dim - pool_size) / max_pool_stride + 1))

    # max pooling layer 1
    with tf.variable_scope("pool_1"):
        h_pool1 = max_pool(h_conv1,
                           stride=max_pool_stride,
                           pool_size=pool_size,
                           padding=padding)
        h_pool1_flat = tf.reshape(
            h_pool1, [-1, max_pool_out_dim * max_pool_out_dim * num_filters])

    # fully connected layer 1
    with tf.variable_scope("fc_1"):
        W_fc1 = weight_variable(
            [max_pool_out_dim * max_pool_out_dim * num_filters, fc1_size])
        b_fc1 = bias_variable([fc1_size])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool1_flat, W_fc1) + b_fc1)
        h_fc1_dropout = tf.nn.dropout(h_fc1, keep_prob=keep_prob)

    # fully connected layer 2
    with tf.variable_scope("fc_2"):
        W_fc2 = weight_variable([fc1_size, fc2_size])
        b_fc2 = bias_variable([fc2_size])
        h_fc2 = tf.nn.relu(tf.matmul(h_fc1_dropout, W_fc2) + b_fc2)
        h_fc2_dropout = tf.nn.dropout(h_fc2, keep_prob=keep_prob)

    # output layer
    with tf.variable_scope("output_1"):
        W_fc3 = weight_variable([fc2_size, n_classes])
        b_fc3 = bias_variable([n_classes])
        y_conv = tf.matmul(h_fc2_dropout, W_fc3) + b_fc3

    # compute losses
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_conv))
    W1 = tf.get_default_graph().get_tensor_by_name("fc_1/weight:0")
    W2 = tf.get_default_graph().get_tensor_by_name("fc_2/weight:0")
    W3 = tf.get_default_graph().get_tensor_by_name("output_1/weight:0")
    reg_loss = tf.reduce_sum(tf.pow(tf.abs(W1),2)) + tf.reduce_sum(tf.pow(tf.abs(W2),2)) + \
                            tf.reduce_sum(tf.pow(tf.abs(W3),2))
    cost = cross_entropy + (reg_loss * regularization_term)

    # compute predictions and error
    prediction = tf.argmax(y_conv, axis=1)
    correct = tf.equal(tf.argmax(y_conv, axis=1), tf.argmax(y, axis=1))
    error = 1 - tf.reduce_mean(tf.cast(correct, tf.float32))

    # training op
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

    # initialize variables and session
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:
        init.run()

        # initialize cost and error variables
        train_iteration_errors = []
        train_errors = []
        valid_errors = []
        test_errors = []

        # calculate number of iterations per epoch
        train_iterations = int(n_train_samples / minibatch_size)

        for epoch in range(n_epochs):
            if (epoch % 10 == 0):
                print("--- epoch: {}".format(epoch))
            # reset error each epoch
            epoch_train_error = 0.
            epoch_valid_error = 0.
            epoch_test_error = 0.

            for i in range(train_iterations):
                # Get next batch of training data and labels
                train_data_mb, train_label_mb = train.next_batch(
                    minibatch_size)
                # compute error
                train_mb_error = error.eval(feed_dict={
                    X: train_data_mb,
                    y: train_label_mb,
                    keep_prob: 1.0
                })
                epoch_train_error += train_mb_error
                train_iteration_errors.append(train_mb_error)
                # training operation
                sess.run(optimizer,
                         feed_dict={
                             X: train_data_mb,
                             y: train_label_mb,
                             keep_prob: keep_probability
                         })

            # compute average train epoch error
            train_errors.append(epoch_train_error / train_iterations)

            # compute valid epoch error through mini-batches
            valid_iterations = int(n_valid_samples / minibatch_size)
            for i in range(valid_iterations):
                valid_data_mb, valid_label_mb = valid.next_batch(
                    minibatch_size)
                valid_mb_error = error.eval(feed_dict={
                    X: valid_data_mb,
                    y: valid_label_mb,
                    keep_prob: 1.0
                })
                epoch_valid_error += valid_mb_error
            avg_epoch_valid_error = epoch_valid_error / valid_iterations
            valid_errors.append(avg_epoch_valid_error)

            # compute test epoch error through mini-batches
            test_iterations = int(n_test_samples / minibatch_size)
            for i in range(test_iterations):
                test_data_mb, test_label_mb = test.next_batch(minibatch_size)
                test_mb_error = error.eval(feed_dict={
                    X: test_data_mb,
                    y: test_label_mb,
                    keep_prob: 1.0
                })
                epoch_test_error += test_mb_error
            avg_epoch_test_error = epoch_test_error / test_iterations
            test_errors.append(avg_epoch_test_error)

        # save final model
        save_path = saver.save(sess,
                               "./models/{}_final.ckpt".format(MODEL_NAME))

        # print final errors
        print_utils.print_final_error(train_errors[-1], valid_errors[-1],
                                      test_errors[-1])
        # print test error based on best valid epoch
        print_utils.print_best_valid_epoch(train_errors, valid_errors,
                                           test_errors)
        print_utils.write_errors_to_file(train_errors, valid_errors,
                                         test_errors, model_configs,
                                         MODEL_NAME)

        # plot error vs. epoch
        plot_utils.plot_epoch_errors(train_errors,
                                     valid_errors,
                                     prefix=MODEL_NAME)
        plot_utils.plot_train_iteration_errors(train_iteration_errors,
                                               prefix=MODEL_NAME)
        plot_utils.plot_cnn_kernels(vis_layers, W_conv1, prefix=MODEL_NAME)

Exemple #4

0

Afficher le fichier

Fichier : train.py Projet : AntreasAntoniou/DeepClassificationBot

def run(epochs=500, training_percentage=0.4, validation_percentage=0.1, extract=True, cont=True, size=256, top_k=5):
    '''Does the routine required to get the data, put them in needed format and start training the model
       saves weights whenever the model produces a better test result and keeps track of the best loss'''
    if extract:
        print("Extracting data..")
        X, y = data.extract_data(size=size)

        print("Preprocessing data..")
        X, y, nb_samples, num_categories = data.preprocess_data(X, y, save=True, subtract_mean=True)

    else:
        print("Loading data..")
        h5f = h5py.File('data.hdf5', 'r')
        nb_samples = h5f['nb_samples'].value
        num_categories = h5f['n_categories'].value
        h5f.close()

    print("Number of categories: {}".format(num_categories))
    print("Number of samples {}".format(nb_samples))

    data_ids = np.arange(start=0, stop=nb_samples)
    val_ids = data.produce_validation_indices(data_ids, nb_samples * validation_percentage)
    train_ids = data.produce_train_indices(dataset_indx=data_ids, number_of_samples=nb_samples * training_percentage,
                                           val_indx=val_ids)
    # X_train, y_train, X_test, y_test = data.split_data(X, y, split_ratio=split)
    X_train, y_train, X_val, y_val = data.load_dataset_bit_from_hdf5(train_ids, val_ids, only_train=False)
    X_val = X_val / 255

    print("Building and Compiling model..")
    model = m.get_model(n_outputs=num_categories, input_size=size)

    if cont:
        # model.load_weights_until_layer("pre_trained_weights/latest_model_weights.hdf5", 26)
        model.load_weights("pre_trained_weights/latest_model_weights.hdf5")
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])

    print("Training..")

    best_performance = np.inf
    for i in range(epochs):
        train_ids = data.produce_train_indices(dataset_indx=data_ids, number_of_samples=15000, val_indx=val_ids)

        X_train, y_train = data.load_dataset_bit_from_hdf5(train_ids, val_ids, only_train=True)

        X_train = X_train / 255
        X_train = data.augment_data(X_train)

        # fit the model on the batches generated by datagen.flow()
        metadata = model.fit(X_train, y_train, validation_data=[X_val, y_val], batch_size=64,
                             nb_epoch=1, verbose=1, shuffle=True, class_weight=None,
                             sample_weight=None)
        current_loss = metadata.history['loss'][-1]
        current_val_loss = metadata.history['val_loss'][-1]
        preds = model.predict_proba(X_val, batch_size=64)
        print("Loss: {}".format(current_loss))
        print("Val_loss: {}".format(current_val_loss))

        top_3_error = get_top_n_error(preds, y_val, top_k)
        print("Top 3 error: {}".format(top_3_error))
        if current_val_loss < best_performance:
            model.save_weights("pre_trained_weights/model_weights.hdf5", overwrite=True)
            best_performance = current_val_loss
            print("Saving weights..")
        model.save_weights("pre_trained_weights/latest_model_weights.hdf5", overwrite=True)

Exemple #5

0

Afficher le fichier

def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('--net', default='fcn_gcn', help='NN to use: can be unet or fcn_gcn')
    parser.add_argument('--phase', default='train', help='Phase: Can be train, val or test')
    parser.add_argument('--stage', type=int, default=1, help='Training stage')
    parser.add_argument('--load', action='store_true', default=False,
                        help='Turn on to load the pretrained model')

    parser.add_argument('--prepare_data_stats', action='store_true', default=False,
                        help='Turn on to prepare data statistics. Must do this for the first time of training.')

    parser.add_argument('--set', type=int, default=1,
                        help='set for one of the zones/angles: Can be integer from 1 to 16')

    parser.add_argument('--train_image_dir', default='../data/train/images/',
                        help='Directory containing training images')
    parser.add_argument('--train_mask_dir', default='../data/train/masks/',
                        help='Directory containing masks for training images')
    parser.add_argument('--train_data_dir', default='../data/train/misc/',
                        help='Directory to store temporary training data')
    parser.add_argument('--test_image_dir', default='../data/test/images/',
                        help='Directory containing test images')
    parser.add_argument('--test_results_dir', default='../data/test/results/',
                        help='Directory containing results for test set')

    parser.add_argument('--save_dir', default='./models/', help='Directory to contain the trained model')

    parser.add_argument('--save_period', type=int, default=100, help='Period to save the trained model')
    parser.add_argument('--display_period', type=int, default=20,
                        help='Period over which to display loss.')
    parser.add_argument('--num_epochs', type=int, default=100, help='Number of training epochs')
    parser.add_argument('--batch_size', type=int, default=1, help='Batch size')
    parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning rate')
    parser.add_argument('--batch_norm', action='store_true', default=True,
                        help='Turn on to use batch normalization')
    parser.add_argument('--sce_weight', type=float, default=1.,
                        help='Adds softmax cross-entropy (SCE) loss when weight is non-zero')
    parser.add_argument('--edge_factor', type=int, default=0,
                        help='Gives additional weight to edges when using SCE')

    parser.add_argument('--augment_data', action='store_true', default=False,
                        help='Turn on to generate augmented data for the first time')
    parser.add_argument('--augment_factor', type=int, default=1,
                        help='Factor by which to augment original data')

    args = parser.parse_args()

    if args.prepare_data_stats:
        prepare_data_stats(args)

    if args.augment_data:
        augment_data(args)

    cfg = load_config(args.train_data_dir, args.set)
    model = Model(args, cfg)

    if args.phase == 'train':
        train_data = prepare_train_data(args, cfg)
        model.train(train_data)
    elif args.phase == 'val':
        assert args.batch_size == 1
        train_data = prepare_train_data(args, cfg)
        model.validate(train_data)
    elif args.phase == 'test':
        assert args.batch_size == 1
        test_data = prepare_test_data(args, cfg)
        model.test(test_data)
    else:
        return

Exemple #6

0

Afficher le fichier

Fichier : run.py Projet : wchowdhu/udacity-capstone-project

    # Open train and test csv files using pandas library
    train_df = pd.read_csv(args.train_file)
    test_df = pd.read_csv(args.test_file)

    # Split training dataset into two parts - the data we will train the model with and a validation set.
    train_df, validation_df = data.split_data(train_df)

    # Check the number of rows and columns in the subsets after split
    print("Train data shape after split: {} \n".format(train_df.shape))
    print("Validation data shape after split: {} \n".format(
        validation_df.shape))

    # Augment training data
    train_df = data.augment_data(train_df,
                                 test_df,
                                 use_xnli=args.load_xnli,
                                 use_mnli=args.load_mnli,
                                 use_bt=args.back_translate,
                                 bt_filepath=args.bt_file)

    # Define the tokenizer to preprocess the input data
    tokenizer = data.define_tokenizer(args.model_name)

    # Batch encode input training data
    train_input = data.encode(train_df,
                              tokenizer,
                              max_len=args.max_sequence_length)
    input_word_ids = train_input['input_word_ids']
    input_mask = train_input['input_mask']
    labels = train_input['labels']
    print(
        "Training input shape: input_word_ids=>{}, input_mask=>{}, labels=>{}".

Exemple #7

0

Afficher le fichier

def run(epochs=500,
        training_percentage=0.4,
        validation_percentage=0.1,
        extract=True,
        cont=True,
        size=256,
        top_k=5):
    '''Does the routine required to get the data, put them in needed format and start training the model
       saves weights whenever the model produces a better test result and keeps track of the best loss'''
    if extract:
        print("Extracting data..")
        X, y = data.extract_data(size=size)

        print("Preprocessing data..")
        X, y, nb_samples, num_categories = data.preprocess_data(
            X, y, save=True, subtract_mean=True)

    else:
        print("Loading data..")
        h5f = h5py.File('data.hdf5', 'r')
        nb_samples = h5f['nb_samples'].value
        num_categories = h5f['n_categories'].value
        h5f.close()

    print("Number of categories: {}".format(num_categories))
    print("Number of samples {}".format(nb_samples))

    data_ids = np.arange(start=0, stop=nb_samples)
    val_ids = data.produce_validation_indices(
        data_ids, nb_samples * validation_percentage)
    train_ids = data.produce_train_indices(dataset_indx=data_ids,
                                           number_of_samples=nb_samples *
                                           training_percentage,
                                           val_indx=val_ids)
    # X_train, y_train, X_test, y_test = data.split_data(X, y, split_ratio=split)
    X_train, y_train, X_val, y_val = data.load_dataset_bit_from_hdf5(
        train_ids, val_ids, only_train=False)
    X_val = X_val / 255

    print("Building and Compiling model..")
    model = m.get_model(n_outputs=num_categories, input_size=size)

    if cont:
        # model.load_weights_until_layer("pre_trained_weights/latest_model_weights.hdf5", 26)
        model.load_weights("pre_trained_weights/latest_model_weights.hdf5")
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=["accuracy"])

    print("Training..")

    best_performance = np.inf
    for i in range(epochs):
        train_ids = data.produce_train_indices(dataset_indx=data_ids,
                                               number_of_samples=15000,
                                               val_indx=val_ids)

        X_train, y_train = data.load_dataset_bit_from_hdf5(train_ids,
                                                           val_ids,
                                                           only_train=True)

        X_train = X_train / 255
        X_train = data.augment_data(X_train)

        # fit the model on the batches generated by datagen.flow()
        metadata = model.fit(X_train,
                             y_train,
                             validation_data=[X_val, y_val],
                             batch_size=64,
                             nb_epoch=1,
                             verbose=1,
                             shuffle=True,
                             class_weight=None,
                             sample_weight=None)
        current_loss = metadata.history['loss'][-1]
        current_val_loss = metadata.history['val_loss'][-1]
        preds = model.predict_proba(X_val, batch_size=64)
        print("Loss: {}".format(current_loss))
        print("Val_loss: {}".format(current_val_loss))

        top_3_error = get_top_n_error(preds, y_val, top_k)
        print("Top 3 error: {}".format(top_3_error))
        if current_val_loss < best_performance:
            model.save_weights("pre_trained_weights/model_weights.hdf5",
                               overwrite=True)
            best_performance = current_val_loss
            print("Saving weights..")
        model.save_weights("pre_trained_weights/latest_model_weights.hdf5",
                           overwrite=True)