Esempio n. 1
0
def pretrain():
    model = self_supervized_model(400)
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=keras.optimizers.Adam(learning_rate=8e-5),
                  metrics='accuracy')

    config = {
        'batch_size': 128,
        'maxlen': 80,
        'units': 400,
        'model': model,
    }

    run = wandb.init(project="dotter",
                     group="pretrain",
                     tags=[],
                     config=config)

    wandb_callback = wandb.keras.WandbCallback(log_batch_frequency=50,
                                               save_model=False,
                                               log_weights=False)

    with run:
        for fname in utils.iterate_files(["../wikipedia/AA"]):
            name = fname.split('/')[-1]
            raw_y = load_plaintext(fname, 80)
            utils.shuffle_in_unison(raw_y)
            x, y = get_masked(raw_y, 0.3)
            model.fit(x, y, batch_size=128, validation_split=0.1, callbacks=[wandb_callback])
            model.save(f'{pretrain_path}/{name}.h5', save_format='tf')

    model.save(model_name, save_format='tf')
    return model
Esempio n. 2
0
    def __init__(self, train_percent=0.8, test_percent=None):
        mnist = fetch_mldata('MNIST original')
        data = normalize(mnist.data.astype(np.float64))
        target = np.around(mnist.target).astype(np.int8)
        data, target = shuffle_in_unison(data, target)
        total_samples_number = data.shape[0]
        self.train_samples_number = int(total_samples_number * train_percent)
        self.test_samples_number = total_samples_number - self.train_samples_number
        if test_percent is not None:
            self.test_samples_number = min(
                self.test_samples_number,
                int(total_samples_number * test_percent))
        print "Train data contains %s samples, test data contains %s samples" %\
              (self.train_samples_number, self.test_samples_number)

        self.train_data = data[:self.train_samples_number]
        self.train_target = target[:self.train_samples_number]

        self.train_target_vectorized = np.empty((self.train_data.shape[0], 10))
        for i in xrange(self.train_data.shape[0]):
            self.train_target_vectorized[i] = vectorize_mnist_target(
                self.train_target[i])

        self.test_data = data[self.
                              train_samples_number:self.train_samples_number +
                              self.test_samples_number]
        self.test_target = target[self.train_samples_number:self.
                                  train_samples_number +
                                  self.test_samples_number]
Esempio n. 3
0
    def SGD(self, train_data, train_target, epochs, mini_batch_size, eta):
        """Trains the network using stochastic gradient descendant.

        Args:
            train_data: set of training samples without answers, numpy ndarray of shape (M, N) where M is the number of
                samples and N is the number of features -- a usual sklearn input format.
            train_target: set of answers for train_data, numpy ndarray of shape (M, P) where M is the number of samples
                and P is the number of neurons in the last layer.
            epochs: number of epochs, i.e. iterations over the full training dataset.
            mini_batch_size: number of samples in one batch, i.e. after every mini_batch_size samples weights will be
                updated
            eta: learning rate
        """
        assert(train_data.shape[0] == train_target.shape[0])
        assert(train_data.shape[1] == self.N)
        assert(train_target.shape[1] == self.P)
        self.init_weights_and_biases()
        print "learning on {0} samples with {1} features, minibatch size is {2}"\
            .format(train_data.shape[0], self.N, mini_batch_size)
        for ep_num in xrange(epochs):
            print "Starting epoch {0}".format(ep_num)
            train_data, train_target = shuffle_in_unison(train_data, train_target)
            for mini_batch_index in xrange(0, train_data.shape[0], mini_batch_size):
                # print "Starting minibatch {0}-{1}".format(mini_batch_index, mini_batch_index + mini_batch_size)
                mini_batch_train_data = train_data[mini_batch_index:mini_batch_index + mini_batch_size]
                mini_batch_train_target = train_target[mini_batch_index:mini_batch_index + mini_batch_size]
                dLdW, dLdB = self.update_mini_batch(mini_batch_train_data, mini_batch_train_target, eta)

            print "Epoch {0} complete".format(ep_num)
def perform_training(initializing,
                     netname,
                     numlayers=6,
                     epochs=3,
                     training_sets=2,
                     batch_size=32,
                     learning_rate=.001):
    """

    Parameters
    ----------
    initializing : boolean
        Is True if the net already exists and we want to continue
        training and False if we want to make a new net.
    netname : string
        The name of the network in the file system.
    numlayers: int, optional
        Number of layers to use in the network. The default is 6.
    epochs: int, optional
        Number of epochs to do per training set. The default is 3.
    training_sets: int, optional
        Number of training sets to sample from all possible data
        points. The default is 5.
    learning_rate: float, optional
        Learning rate of the Adam optimizer. Default is .001.

    Returns
    -------
    The trained model

    """

    # Set up training and test data.  Inputs are positions,
    # outputs are (x,y,direction) tuples encoded to integers
    # and then to one-hot vectors, representing
    # either a push or a win.
    # The output vectors are length size*size*4, since a move
    # in any of 4 directions could occur at any of size*size squares.
    x_test, y_test = utils.load_levels(constants.TEST_LEVELS)
    num_classes = 4 * constants.SIZE * constants.SIZE

    # This line implicitly assumes that all levels have the same size.
    # Therefore, small levels are padded with unmovables.
    img_x, img_y, img_z = x_test[0].shape

    input_shape = (img_x, img_y, img_z)

    x_test = x_test.astype('float32')
    print(x_test.shape[0], 'test samples')

    dconst = 0.3  # Dropout between hidden layers

    model = None  # To give the variable global scope
    if initializing:
        # Create a convolutional network with numlayers layers of 3 by 3
        # convolutions and a dense layer at the end.
        # Use batch normalization and regularization.
        model = Sequential()
        model.add(BatchNormalization())
        model.add(
            Conv2D(
                64,
                (3, 3),
                activation='relu',
                input_shape=input_shape,
                #padding = 'same'))
                kernel_regularizer=regularizers.l2(.5),
                padding='same'))
        model.add(Dropout(dconst))

        for i in range(numlayers - 1):
            model.add(BatchNormalization())
            model.add(
                Conv2D(
                    64,
                    (3, 3),
                    activation='relu',
                    #padding = 'same'))
                    kernel_regularizer=regularizers.l2(.5),
                    padding='same'))
            model.add(Dropout(dconst))
        model.add(Flatten())
        model.add(Dense(num_classes, activation='softmax'))
    else:
        # Load the model and its weights
        json_file = open("networks/policy_" + netname + ".json", "r")
        loaded_model_json = json_file.read()
        json_file.close()
        model = model_from_json(loaded_model_json)
        model.load_weights("networks/policy_" + netname + ".h5")
        print("Loaded model from disk")

    model.compile(loss=tensorflow.keras.losses.categorical_crossentropy,
                  optimizer=tensorflow.keras.optimizers.Adam(
                      learning_rate=learning_rate),
                  metrics=['accuracy'])

    # Keep track of the model's accuracy
    class AccuracyHistory(tensorflow.keras.callbacks.Callback):
        def on_train_begin(self, logs={}):
            self.acc = []

        def on_epoch_end(self, batch, logs={}):
            self.acc.append(logs.get('acc'))

    history = AccuracyHistory()

    # Use different training datasets by getting different random
    # samples from the shifts of the input data
    for i in range(training_sets):
        print("training set", i)
        levels_to_train = constants.TRAIN_LEVELS
        x_train, y_train = utils.load_levels(levels_to_train, shifts=True)
        utils.shuffle_in_unison(x_train, y_train)
        x_train = x_train.astype('float32')

        # Train the network
        track = model.fit(x_train,
                          y_train,
                          batch_size=batch_size,
                          epochs=epochs,
                          verbose=1,
                          validation_data=(x_test, y_test),
                          callbacks=[history])

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    plt.plot(range(1, epochs + 1), track.history['val_accuracy'])
    plt.plot(range(1, epochs + 1), track.history['accuracy'])
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.show()

    # Save the trained network
    model_json = model.to_json()
    directory = os.getcwd() + '/networks'
    if not os.path.exists(directory):
        os.mkdir(directory)
    with open("networks/policy_" + netname + ".json", "w") as json_file:
        json_file.write(model_json)

    model.save_weights("networks/policy_" + netname + ".h5")
    print("Saved model to disk")

    return model
Esempio n. 5
0
# Convert data from dataframes to np.arrays
test_data = test_df.values
valid_data = valid_df.values
train_data = train_df.values
test_labels = test_labels.values
valid_labels = valid_labels.values
train_labels = train_labels.values

# Convert labels to one hot vectors
test_labels = to_categorical(test_labels - 1, 7)
valid_labels = to_categorical(valid_labels - 1, 7)
train_labels = to_categorical(train_labels - 1, 7)

# Shuffle the data and labels
shuffle_in_unison(test_data, test_labels)
shuffle_in_unison(valid_data, valid_labels)
shuffle_in_unison(train_data, train_labels)

print('Data sets created')

# Build the model

model = Sequential()
model.add(Dense(120, activation='relu', input_dim=54))
model.add(BatchNormalization())
# model.add(Dropout(0.5))
# model.add(Dense(64, activation='relu'))
# model.add(BatchNormalization())
# model.add(Dropout(0.5))
model.add(Dense(7, activation='softmax'))
Esempio n. 6
0
 def shuffle(self):
     utils.shuffle_in_unison(self.text, self.normalized, self.dagesh,
                             self.niqqud, self.sin)