def pretrain(): model = self_supervized_model(400) model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=8e-5), metrics='accuracy') config = { 'batch_size': 128, 'maxlen': 80, 'units': 400, 'model': model, } run = wandb.init(project="dotter", group="pretrain", tags=[], config=config) wandb_callback = wandb.keras.WandbCallback(log_batch_frequency=50, save_model=False, log_weights=False) with run: for fname in utils.iterate_files(["../wikipedia/AA"]): name = fname.split('/')[-1] raw_y = load_plaintext(fname, 80) utils.shuffle_in_unison(raw_y) x, y = get_masked(raw_y, 0.3) model.fit(x, y, batch_size=128, validation_split=0.1, callbacks=[wandb_callback]) model.save(f'{pretrain_path}/{name}.h5', save_format='tf') model.save(model_name, save_format='tf') return model
def __init__(self, train_percent=0.8, test_percent=None): mnist = fetch_mldata('MNIST original') data = normalize(mnist.data.astype(np.float64)) target = np.around(mnist.target).astype(np.int8) data, target = shuffle_in_unison(data, target) total_samples_number = data.shape[0] self.train_samples_number = int(total_samples_number * train_percent) self.test_samples_number = total_samples_number - self.train_samples_number if test_percent is not None: self.test_samples_number = min( self.test_samples_number, int(total_samples_number * test_percent)) print "Train data contains %s samples, test data contains %s samples" %\ (self.train_samples_number, self.test_samples_number) self.train_data = data[:self.train_samples_number] self.train_target = target[:self.train_samples_number] self.train_target_vectorized = np.empty((self.train_data.shape[0], 10)) for i in xrange(self.train_data.shape[0]): self.train_target_vectorized[i] = vectorize_mnist_target( self.train_target[i]) self.test_data = data[self. train_samples_number:self.train_samples_number + self.test_samples_number] self.test_target = target[self.train_samples_number:self. train_samples_number + self.test_samples_number]
def SGD(self, train_data, train_target, epochs, mini_batch_size, eta): """Trains the network using stochastic gradient descendant. Args: train_data: set of training samples without answers, numpy ndarray of shape (M, N) where M is the number of samples and N is the number of features -- a usual sklearn input format. train_target: set of answers for train_data, numpy ndarray of shape (M, P) where M is the number of samples and P is the number of neurons in the last layer. epochs: number of epochs, i.e. iterations over the full training dataset. mini_batch_size: number of samples in one batch, i.e. after every mini_batch_size samples weights will be updated eta: learning rate """ assert(train_data.shape[0] == train_target.shape[0]) assert(train_data.shape[1] == self.N) assert(train_target.shape[1] == self.P) self.init_weights_and_biases() print "learning on {0} samples with {1} features, minibatch size is {2}"\ .format(train_data.shape[0], self.N, mini_batch_size) for ep_num in xrange(epochs): print "Starting epoch {0}".format(ep_num) train_data, train_target = shuffle_in_unison(train_data, train_target) for mini_batch_index in xrange(0, train_data.shape[0], mini_batch_size): # print "Starting minibatch {0}-{1}".format(mini_batch_index, mini_batch_index + mini_batch_size) mini_batch_train_data = train_data[mini_batch_index:mini_batch_index + mini_batch_size] mini_batch_train_target = train_target[mini_batch_index:mini_batch_index + mini_batch_size] dLdW, dLdB = self.update_mini_batch(mini_batch_train_data, mini_batch_train_target, eta) print "Epoch {0} complete".format(ep_num)
def perform_training(initializing, netname, numlayers=6, epochs=3, training_sets=2, batch_size=32, learning_rate=.001): """ Parameters ---------- initializing : boolean Is True if the net already exists and we want to continue training and False if we want to make a new net. netname : string The name of the network in the file system. numlayers: int, optional Number of layers to use in the network. The default is 6. epochs: int, optional Number of epochs to do per training set. The default is 3. training_sets: int, optional Number of training sets to sample from all possible data points. The default is 5. learning_rate: float, optional Learning rate of the Adam optimizer. Default is .001. Returns ------- The trained model """ # Set up training and test data. Inputs are positions, # outputs are (x,y,direction) tuples encoded to integers # and then to one-hot vectors, representing # either a push or a win. # The output vectors are length size*size*4, since a move # in any of 4 directions could occur at any of size*size squares. x_test, y_test = utils.load_levels(constants.TEST_LEVELS) num_classes = 4 * constants.SIZE * constants.SIZE # This line implicitly assumes that all levels have the same size. # Therefore, small levels are padded with unmovables. img_x, img_y, img_z = x_test[0].shape input_shape = (img_x, img_y, img_z) x_test = x_test.astype('float32') print(x_test.shape[0], 'test samples') dconst = 0.3 # Dropout between hidden layers model = None # To give the variable global scope if initializing: # Create a convolutional network with numlayers layers of 3 by 3 # convolutions and a dense layer at the end. # Use batch normalization and regularization. model = Sequential() model.add(BatchNormalization()) model.add( Conv2D( 64, (3, 3), activation='relu', input_shape=input_shape, #padding = 'same')) kernel_regularizer=regularizers.l2(.5), padding='same')) model.add(Dropout(dconst)) for i in range(numlayers - 1): model.add(BatchNormalization()) model.add( Conv2D( 64, (3, 3), activation='relu', #padding = 'same')) kernel_regularizer=regularizers.l2(.5), padding='same')) model.add(Dropout(dconst)) model.add(Flatten()) model.add(Dense(num_classes, activation='softmax')) else: # Load the model and its weights json_file = open("networks/policy_" + netname + ".json", "r") loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights("networks/policy_" + netname + ".h5") print("Loaded model from disk") model.compile(loss=tensorflow.keras.losses.categorical_crossentropy, optimizer=tensorflow.keras.optimizers.Adam( learning_rate=learning_rate), metrics=['accuracy']) # Keep track of the model's accuracy class AccuracyHistory(tensorflow.keras.callbacks.Callback): def on_train_begin(self, logs={}): self.acc = [] def on_epoch_end(self, batch, logs={}): self.acc.append(logs.get('acc')) history = AccuracyHistory() # Use different training datasets by getting different random # samples from the shifts of the input data for i in range(training_sets): print("training set", i) levels_to_train = constants.TRAIN_LEVELS x_train, y_train = utils.load_levels(levels_to_train, shifts=True) utils.shuffle_in_unison(x_train, y_train) x_train = x_train.astype('float32') # Train the network track = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test), callbacks=[history]) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) plt.plot(range(1, epochs + 1), track.history['val_accuracy']) plt.plot(range(1, epochs + 1), track.history['accuracy']) plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.show() # Save the trained network model_json = model.to_json() directory = os.getcwd() + '/networks' if not os.path.exists(directory): os.mkdir(directory) with open("networks/policy_" + netname + ".json", "w") as json_file: json_file.write(model_json) model.save_weights("networks/policy_" + netname + ".h5") print("Saved model to disk") return model
# Convert data from dataframes to np.arrays test_data = test_df.values valid_data = valid_df.values train_data = train_df.values test_labels = test_labels.values valid_labels = valid_labels.values train_labels = train_labels.values # Convert labels to one hot vectors test_labels = to_categorical(test_labels - 1, 7) valid_labels = to_categorical(valid_labels - 1, 7) train_labels = to_categorical(train_labels - 1, 7) # Shuffle the data and labels shuffle_in_unison(test_data, test_labels) shuffle_in_unison(valid_data, valid_labels) shuffle_in_unison(train_data, train_labels) print('Data sets created') # Build the model model = Sequential() model.add(Dense(120, activation='relu', input_dim=54)) model.add(BatchNormalization()) # model.add(Dropout(0.5)) # model.add(Dense(64, activation='relu')) # model.add(BatchNormalization()) # model.add(Dropout(0.5)) model.add(Dense(7, activation='softmax'))
def shuffle(self): utils.shuffle_in_unison(self.text, self.normalized, self.dagesh, self.niqqud, self.sin)