def inference(self, mode='simple'): left_image = Input(shape=(self.model_in_height, self.model_in_width, self.model_in_depth), name='left_input') right_image = Input(shape=(self.model_in_height, self.model_in_width, self.model_in_depth), name='right_image') if mode == 'simple': concate_view = concatenate([left_image, right_image], axis=3, name='concate_view') prediction = self.FlowNetSimple(concate_view) FlowNet = Model(inputs=[left_image, right_image], outputs=[prediction]) opt = Adam(lr=self.learning_rate) FlowNet.compile(optimizer=opt, loss='mae') FlowNet.summary() return FlowNet if mode == 'correlation': prediction = self.FlowNetCorr(left_image, right_image) FlowNet = Model(inputs=[left_image, right_image], outputs=[prediction]) opt = Adam(lr=self.learning_rate) FlowNet.compile(optimizer=opt, loss='mae') FlowNet.summary() return FlowNet
def actor_optimizer(self): action = K.placeholder(shape=[None, self.action_size]) advantages = K.placeholder(shape=[ None, ]) #advatages -> *multi-step* policy = self.actor.output action_prob = K.sum(action * policy, axis=1) cross_entropy = K.log(action_prob + 1e-10) * advantages cross_entropy = -K.mean(cross_entropy) # add (-entropy) to loss function, for enthusiastic search minus_entropy = K.sum(policy * K.log(policy + 1e-10), axis=1) minus_entropy = K.mean(minus_entropy) # optimizing loss minimizes cross_entropy, maximizes entropy loss = cross_entropy #+ 0.01 * minus_entropy optimizer = Adam(lr=self.actor_lr) updates = optimizer.get_updates(loss, self.actor.trainable_weights) train = K.function([self.actor.input, action, advantages], [loss], updates=updates) return train
def critic_optimizer(self): discounted_prediction = K.placeholder(shape=(None, )) value = self.critic.output # loss = MSE(discounted_prediction, value) loss = K.mean(K.square(discounted_prediction - value)) optimizer = Adam(lr=self.critic_lr) updates = optimizer.get_updates(loss, self.critic.trainable_weights) train = K.function([self.critic.input, discounted_prediction], [loss], updates=updates) return train
def fit(self, flow, epochs, lr, validation_data, train_callbacks=[], batches=300): history = LossHistory() fbeta = Fbeta(validation_data) opt = Adam(lr=lr) self.model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.model.fit_generator(flow, steps_per_epoch=batches, epochs=epochs, callbacks=[history, earlyStopping, fbeta] + train_callbacks, validation_data=validation_data) fb_score, thresholds = self.get_fbeta_score(validation_data, verbose=False) return [ fbeta.fbeta, history.train_losses, history.val_losses, fb_score, thresholds ]
def get_training_parameters(data): patience = 30 optimizer = Adam() min_delta = 0.0001 if data.dataset == "cifar100": patience = 50 elif data.dataset == "GTSRB": optimizer = Adam(lr=0.0005) # Senkes? # optimizer = Adam(lr=0.0003) patience = 50 elif data.dataset == "caltech_siluettes": patience = 50 elif data.dataset == "rockpaperscissors": patience = 50 return min_delta, optimizer, patience
def train_model(self, x_train, y_train, learn_rate=0.001, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) opt = Adam(lr=learn_rate) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # early stopping will auto-stop training process if model stops learning after 3 epochs earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.classifier.fit( X_train, y_train, batch_size=batch_size, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score]
def _build_model(self): # Neural Net for Deep-Q learning Model model = Sequential() model.add(Dense(24, input_dim=self.state_size, activation='relu')) model.add(Dense(24, activation='relu')) model.add(Dense(self.action_size, activation='linear')) model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate)) return model
def train(data, file_name, params, num_epochs=50, batch_size=256, train_temp=1, init=None, lr=0.01, decay=1e-5, momentum=0.9, activation="relu", optimizer_name="sgd"): """ Train a n-layer simple network for MNIST and CIFAR """ # create a Keras sequential model model = Sequential() # reshape the input (28*28*1) or (32*32*3) to 1-D model.add(Flatten(input_shape=data.train_data.shape[1:])) # dense layers (the hidden layer) n = 0 for param in params: n += 1 model.add(Dense(param, kernel_initializer='he_uniform')) # ReLU activation if activation == "arctan": model.add(Lambda(lambda x: tf.atan(x), name=activation+"_"+str(n))) else: model.add(Activation(activation, name=activation+"_"+str(n))) # the output layer, with 10 classes model.add(Dense(10, kernel_initializer='he_uniform')) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted/train_temp) if optimizer_name == "sgd": # initiate the SGD optimizer with given hyper parameters optimizer = SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True) elif optimizer_name == "adam": optimizer = Adam(lr=lr, beta_1 = 0.9, beta_2 = 0.999, epsilon = None, decay=decay, amsgrad=False) # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=optimizer, metrics=['accuracy']) model.summary() print("Traing a {} layer model, saving to {}".format(len(params) + 1, file_name)) # run training with given dataset, and print progress history = model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) print('model saved to ', file_name) return {'model':model, 'history':history}
def train_model(self, x_train, y_train, x_valid, y_valid, learn_rate=0.001, epoch=5, batch_size=128, w_sam_map=None, train_callbacks=()): history = LossHistory() opt = Adam(lr=learn_rate) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # early stopping will auto-stop training process if model stops learning after 3 epochs earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=2, mode='auto') # datagen = ImageDataGenerator( # width_shift_range=0.1, # height_shift_range=0.1, # fill_mode="reflect", # horizontal_flip=True, # vertical_flip=True) # # # fits the model on batches with real-time data augmentation: # self.classifier.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), # steps_per_epoch=len(x_train) / batch_size, # epochs=epoch, # verbose=2, # validation_data=(x_valid, y_valid), # callbacks=[history] + train_callbacks + [earlyStopping]) # Fix AttributeError following https://github.com/fchollet/keras/pull/6502/files #self.classifier.fit(x_train, y_train, shuffle="batch", batch_size=batch_size) if w_sam_map is None: w_sam = None else: w_sam = np.vectorize(w_sam_map.get)(np.array(y_train)) w_sam = w_sam.max(axis=1) self.classifier.fit(x_train, y_train, shuffle="batch", batch_size=batch_size, epochs=epoch, verbose=2, validation_data=(x_valid, y_valid), sample_weight=w_sam, callbacks=[history] + train_callbacks + [earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, x_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score]
def define_AttMLFNet(sz_input, sz_input2, view_n, learning_rate): """ 4 branches inputs""" input_list = [] for i in range(len(view_n) * 4): input_list.append(Input(shape=(sz_input, sz_input2, 1))) """ 4 branches features""" feature_extraction_layer = feature_extraction(sz_input, sz_input2) feature_list = [] for i in range(len(view_n) * 4): feature_list.append(feature_extraction_layer(input_list[i])) feature_v_list = [] feature_h_list = [] feature_45_list = [] feature_135_list = [] for i in range(9): feature_h_list.append(feature_list[i]) for i in range(9, 18): feature_v_list.append(feature_list[i]) for i in range(18, 27): feature_45_list.append(feature_list[i]) for i in range(27, len(feature_list)): feature_135_list.append(feature_list[i]) """ cost volume """ cv_h = Lambda(_get_h_CostVolume_)(feature_h_list) cv_v = Lambda(_get_v_CostVolume_)(feature_v_list) cv_45 = Lambda(_get_45_CostVolume_)(feature_45_list) cv_135 = Lambda(_get_135_CostVolume_)(feature_135_list) """ intra branch """ cv_h_3d, cv_h_ca = to_3d_h(cv_h) cv_v_3d, cv_v_ca = to_3d_v(cv_v) cv_45_3d, cv_45_ca = to_3d_45(cv_45) cv_135_3d, cv_135_ca = to_3d_135(cv_135) """ inter branch """ cv, attention_4 = branch_attention( multiply([cv_h_3d, cv_v_3d, cv_45_3d, cv_135_3d]), cv_h_ca, cv_v_ca, cv_45_ca, cv_135_ca) """ cost volume regression """ cost = basic(cv) cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost) pred = Activation('softmax')(cost) pred = Lambda(disparityregression)(pred) model = Model(inputs=input_list, outputs=[pred]) model.summary() opt = Adam(lr=learning_rate) model.compile(optimizer=opt, loss='mae') return model
def CreateSimpleImageModel_512(): dataIn = Input(shape=(3, )) layer = Dense(4 * 4, activation='tanh')(dataIn) layer = Dense(512 * 512 * 4, activation='linear')(layer) layer = Reshape((1, 512, 512, 4))(layer) modelOut = layer model = Model(inputs=[dataIn], outputs=[modelOut]) adam = Adam(lr=0.005, decay=0.0001) model.compile(loss='mean_squared_error', optimizer=adam, metrics=['accuracy']) return model
def train_model(self, x_train, y_train, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) adam = Adam(lr=0.01, decay=1e-6) rms = RMSprop(lr=0.0001, decay=1e-6) self.classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('X_train.shape[0]') print(X_train.shape[0]) checkpointer = ModelCheckpoint(filepath="weights.best.hdf5", verbose=1, save_best_only=True) datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images datagen.fit(X_train) self.classifier.fit_generator( datagen.flow(X_train, y_train, batch_size=batch_size), steps_per_epoch=X_train.shape[0] // batch_size, epochs=epoch, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, checkpointer]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) print(fbeta_score) return [history.train_losses, history.val_losses, fbeta_score]
def train(): """ train """ model = get_model() x_train, y_train = load_data(PLANET_KAGGLE_JPEG_DIR) validation_split_size = 0.2 learn_rate = 0.001 epoch = 5 batch_size = 128 train_callbacks = () history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) opt = Adam(lr=learn_rate) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') model.fit(X_train, y_train, batch_size=batch_size, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) model.save(MODEL_NAME + '_model.h5') p_valid = model.predict(X_valid) fbeta = fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples') print(fbeta)
def train_model_aug(self, x_train, y_train, learn_rate=0.001, epoch=5, batch_size=128, validation_split_size=0.15, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) opt = Adam(lr=learn_rate) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # early stopping will auto-stop training process if model stops learning after 3 epochs earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.2, height_shift_range=0.2, zoom_range=0.1, horizontal_flip=True) datagen.fit(x_train) self.classifier.fit_generator( datagen.flow(X_train, y_train, batch_size=32), steps_per_epoch=len(x_train) / 32, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score]
def __init__(self, config): self.name = config.model_type + '_' + config.model_version self.data_set = config.data_set self.batch_size = config.batch_size self.num_actions = config.num_actions self.seq_len = config.pick_num if config.pick_num > 0 else ( config.crop_len if config.crop_len > 0 else None) self.njoints = config.njoints self.body_members = config.body_members self.dropout = config.dropout real_seq = Input( batch_shape=(self.batch_size, self.njoints, self.seq_len, 3), name='real_seq', dtype='float32') pred_action = self.classifier(real_seq) self.model = Model(real_seq, pred_action, name=self.name) self.model.compile(Adam(lr=config.learning_rate), 'sparse_categorical_crossentropy', ['accuracy'])
def train_model_generator(self, generator_train, generator_valid, learn_rate=0.001, epoch=5, batchSize=128, steps=32383, validation_steps=8096, train_callbacks=()): history = LossHistory() #valid 8096 32383 opt = Adam(lr=learn_rate) steps = steps / batchSize + 1 - 9 validation_steps = validation_steps / batchSize + 1 if steps % batchSize == 0: steps = steps / batchSize - 9 if validation_steps % batchSize == 0: validation_steps = validation_steps / batchSize print(steps, validation_steps) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.classifier.fit_generator( generator_train, steps_per_epoch=steps, epochs=epoch, verbose=1, validation_data=generator_valid, validation_steps=validation_steps, callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score]
def __init__(self): # Input shape self.img_rows = 28 self.img_cols = 28 self.channels = 1 self.img_shape = (self.img_rows, self.img_cols, self.channels) self.latent_dim = 100 optimizer = Adam(0.0002, 0.5) # Build and compile the discriminator self.discriminator = self.build_discriminator() # Build the generator self.generator = self.build_generator() # load weight self.load_weights_from_file() # Compile the discriminator self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # The generator takes noise as input and generates imgs z = Input(shape=(self.latent_dim, )) img = self.generator(z) # For the combined model we will only train the generator self.discriminator.trainable = False # The discriminator takes generated images as input and determines validity valid = self.discriminator(img) # The combined model (stacked generator and discriminator) # Trains the generator to fool the discriminator self.combined = Model(z, valid) self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
def define_LFattNet(sz_input, sz_input2, view_n, learning_rate): """ 81 inputs""" input_list = [] for i in range(len(view_n) * len(view_n)): print('input ' + str(i)) input_list.append(Input(shape=(sz_input, sz_input2, 1))) """ 81 features""" feature_extraction_layer = feature_extraction(sz_input, sz_input2) feature_list = [] for i in range(len(view_n) * len(view_n)): print('feature ' + str(i)) feature_list.append(feature_extraction_layer(input_list[i])) """ cost volume """ cv = Lambda(_getCostVolume_)(feature_list) """ channel attention """ cv, attention = channel_attention(cv) """ cost volume regression """ cost = basic(cv) cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost) pred = Activation('softmax')(cost) pred = Lambda(disparityregression)(pred) # when training use below # model = Model(inputs=input_list, outputs=[pred]) # when evaluation use below model = Model(inputs=input_list, outputs=[pred, attention]) model.summary() opt = Adam(lr=learning_rate) model.compile(optimizer=opt, loss='mae') return model
def main(): train = get_data() train_images, train_labels = make_batch(train) dis = discriminator() dis.summary() dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) dis.compile(loss='binary_crossentropy', optimizer=dis_opt) gen = generator() gen.summary() gen.trainable = True dis.trainable = False comb = combine(gen, dis) comb.summary() gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) comb.compile(loss='binary_crossentropy', optimizer=gen_opt) batch_size = _BATCH_SIZE noise_dim = _NOISE_DIM max_epoch = 10001 max_train_only_dis = 5 minibatch_size = batch_size * max_train_only_dis max_loop = int(train_images.shape[0] / minibatch_size) real = np.zeros((batch_size, train_images.shape[1], train_images.shape[2], train_images.shape[3]), dtype=np.float32) minibatch_train_images = np.zeros( (minibatch_size, train_images.shape[1], train_images.shape[2], train_images.shape[3]), dtype=np.float32) progbar = Progbar(target=max_epoch) real_label = [-1] * batch_size fake_label = [1] * batch_size for epoch in range(max_epoch): np.random.shuffle(train_images) for loop in range(max_loop): minibatch_train_images = train_images[loop * minibatch_size:(loop + 1) * minibatch_size] for train_only_dis in range(max_train_only_dis): real = minibatch_train_images[train_only_dis * batch_size:(train_only_dis + 1) * batch_size] noise = np.random.uniform( -1, 1, (batch_size, noise_dim)).astype(np.float32) dis.trainable = False y = [1] * batch_size gen_loss = comb.train_on_batch(noise, y) dis.trainable = True y = [1] * batch_size + [0] * batch_size fake = gen.predict(noise) dis_loss = dis.train_on_batch(np.concatenate((real, fake)), y) progbar.add(1, values=[("dis_loss", dis_loss), ("gen_loss", gen_loss)]) if epoch % 100 == 0: tmp = [r.reshape(-1, 32) for r in fake] tmp = np.concatenate(tmp, axis=1) img = ((tmp / 2.0 + 0.5) * 255.0).astype(np.uint8) Image.fromarray(img).save("generate/%d.png" % (epoch)) backend.clear_session()
x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 y_train = to_categorical(y_train, 10) y_test = to_categorical(y_test, 10) model = Sequential() model.add(Dense(512, input_shape=(784, ))) model.add(Activation("relu")) model.add(Dropout(0.2)) model.add(Dense(512)) model.add(Activation("relu")) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer=Adam(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=1, validation_data=(x_test, y_test)) model.save('mnist_model.h5') converter = lite.TFLiteConverter.from_keras_model_file('mnist_model.h5') tflite_model = converter.convert() open('mnist_model.tflite', 'wb').write(tflite_model)
def train(data, file_name, filters, kernels, num_epochs=50, batch_size=128, train_temp=1, init=None, activation=tf.nn.relu, bn=False): """ Train a n-layer CNN for MNIST and CIFAR """ # create a Keras sequential model model = Sequential() model.add( Conv2D(filters[0], kernels[0], input_shape=data.train_data.shape[1:])) if bn: model.add(BatchNormalization()) model.add(Lambda(activation)) for f, k in zip(filters[1:], kernels[1:]): model.add(Conv2D(f, k)) if bn: model.add(BatchNormalization()) # ReLU activation model.add(Lambda(activation)) # the output layer, with 10 classes model.add(Flatten()) model.add(Dense(10)) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) # initiate the Adam optimizer sgd = Adam() # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.summary() print("Traing a {} layer model, saving to {}".format( len(filters) + 1, file_name)) # run training with given dataset, and print progress history = model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return {'model': model, 'history': history}
dense2 = Dense(84, )(dense1) # 全连接层dense3 dense3 = Dense(10, activation='softmax')(dense2) return dense3 if __name__ == '__main__': # 输入 myinput = Input([32, 32, 3]) # 构建网络 output = lenet(myinput) # 建立模型 model = Model(myinput, output) # 定义优化器,这里选用Adam优化器,学习率设置为0.0003 adam = Adam(lr=0.0003) # 编译模型 model.compile(optimizer=adam, loss="categorical_crossentropy", metrics=['accuracy']) # 准备数据 # 获取输入的图像 X = GetTrainDataByLabel('data') # 获取图像的label,这里使用to_categorical函数返回one-hot之后的label Y = to_categorical(GetTrainDataByLabel('labels')) # 开始训练模型,batch设置为200,一共50个epoch model.fit(X, Y, 200,
def __init__(self, nb_classes, model, seq_length, saved_model=None, features_length=2048): """ `model` = one of: lstm lrcn mlp conv_3d c3d `nb_classes` = the number of classes to predict `seq_length` = the length of our video sequences `saved_model` = the path to a saved Keras model to load """ # Set defaults. self.seq_length = seq_length self.load_model = load_model self.saved_model = saved_model self.nb_classes = nb_classes self.feature_queue = deque() # Set the metrics. Only use top k if there's a need. metrics = ['accuracy'] if self.nb_classes >= 10: metrics.append('top_k_categorical_accuracy') # Get the appropriate model. if self.saved_model is not None: print("Loading model %s" % self.saved_model) self.model = load_model(self.saved_model) elif model == 'lstm': print("Loading LSTM model.") self.input_shape = (seq_length, features_length) self.model = self.lstm() elif model == 'lrcn': print("Loading CNN-LSTM model.") self.input_shape = (seq_length, 80, 80, 3) self.model = self.lrcn() elif model == 'mlp': print("Loading simple MLP.") self.input_shape = (seq_length, features_length) self.model = self.mlp() elif model == 'conv_3d': print("Loading Conv3D") self.input_shape = (seq_length, 80, 80, 3) self.model = self.conv_3d() elif model == 'c3d': print("Loading C3D") self.input_shape = (seq_length, 80, 80, 3) self.model = self.c3d() else: print("Unknown network.") sys.exit() # Now compile the network. optimizer = Adam(lr=1e-5, decay=1e-6) self.model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=metrics) print(self.model.summary())
def main(): # Define Functions build_generator = build_generator_dense build_discriminator = build_discriminator_dense # Build dictionary dictionary = {} reverse_dictionary = {} for i, c in enumerate(alphabet): dictionary[c]=i+1 reverse_dictionary[i+1]=c # Build Oprimizer optimizer = Adam(learning_rate, 0.5) # Build and compile the discriminator print ("*** BUILDING DISCRIMINATOR ***") discriminator = build_discriminator() discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # Build and compile the generator print ("*** BUILDING GENERATOR ***") generator = build_generator() generator.compile(loss='binary_crossentropy', optimizer=optimizer) # The generator takes noise as input and generated samples z = Input(shape=noise_shape) gen = generator(z) # For the combined model we will only train the generator discriminator.trainable = False # The valid takes generated samples as input and determines validity valid = discriminator(gen) # The combined model (stacked generator and discriminator) takes # noise as input => generates samples => determines validity combined = Model(z, valid) combined.compile(loss='binary_crossentropy', optimizer=optimizer) # Load the dataset data = [] for line in open(input_data,"r").read().splitlines(): this_sample=np.zeros(url_shape) line = line.lower() if len ( set(line) - set(alphabet)) == 0 and len(line) < url_len: for i, position in enumerate(this_sample): this_sample[i][0]=1.0 for i, char in enumerate(line): this_sample[i][0]=0.0 this_sample[i][dictionary[char]]=1.0 data.append(this_sample) else: print("Uncompatible line:", line) print("Data ready. Lines:", len(data)) X_train = np.array(data) print ("Array Shape:", X_train.shape) half_batch = int(batch_size / 2) # Start Training for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random half batch of data idx = np.random.randint(0, X_train.shape[0], half_batch) samples = X_train[idx] noise_batch_shape = (half_batch,) + noise_shape noise = np.random.normal(0, 1, noise_batch_shape) # Generate a half batch of new data gens = generator.predict(noise) # Train the discriminator d_loss_real = discriminator.train_on_batch(samples, np.ones((half_batch, 1))) d_loss_fake = discriminator.train_on_batch(gens, np.zeros((half_batch, 1))) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- noise_batch_shape = (batch_size,) + noise_shape noise = np.random.normal(0, 1, noise_batch_shape) # The generator wants the discriminator to label the generated samples as valid (ones) valid_y = np.array([1] * batch_size) # Train the generator g_loss = combined.train_on_batch(noise, valid_y) # Plot the progress print ("%d [D loss: %0.3f, acc.: %0.3f%%] [G loss: %0.3f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss)) # If at save interval, print some examples if epoch % save_interval == 0: generated_samples=[] r, c = 5, 5 noise_batch_shape = (print_size,) + noise_shape noise = np.random.normal(0, 1, noise_batch_shape) gens = generator.predict(noise) for url in gens: this_url_gen = "" for position in url: this_index = np.argmax(position) if this_index != 0: this_url_gen += reverse_dictionary[this_index] print(this_url_gen) generated_samples.append(this_url_gen) # Save networks discriminator.save(discriminator_savefile) generator.save(generator_savefile) # Save Samples fo = open(generated_savefile, "w") for url in generated_samples: print (url, file=fo) fo.close()
import numpy as np from tensorflow.contrib.keras.api.keras.models import Sequential from tensorflow.contrib.keras.api.keras.layers import Dense, Activation from tensorflow.contrib.keras.api.keras.optimizers import SGD, Adam #import matplotlib.pyplot as plt data = np.loadtxt('sin.csv', delimiter=',', unpack=True) x = data[0] y = data[1] model = Sequential() model.add(Dense(30, input_shape=(1, ))) model.add(Activation('sigmoid')) model.add(Dense(40)) model.add(Activation('sigmoid')) model.add(Dense(1)) sgd = Adam(lr=0.1) model.compile(loss='mean_squared_error', optimizer=sgd) model.fit(x, y, epochs=1000, batch_size=20, verbose=0) print('save model') model.save('sin_model.h5') predictions = model.predict(x) print(np.mean(np.square(predictions - y))) preds = model.predict(x) plt.plot(x, y, 'b', x, preds, 'r--') plt.show()
def train_cnn_7layer(data, file_name, params, num_epochs=50, batch_size=256, train_temp=1, init=None, lr=0.01, decay=1e-5, momentum=0.9, activation="relu", optimizer_name="sgd"): """ Train a 7-layer cnn network for MNIST and CIFAR (same as the cnn model in Clever) mnist: 32 32 64 64 200 200 cifar: 64 64 128 128 256 256 """ # create a Keras sequential model model = Sequential() print("training data shape = {}".format(data.train_data.shape)) # define model structure model.add(Conv2D(params[0], (3, 3), input_shape=data.train_data.shape[1:])) model.add(Activation(activation)) model.add(Conv2D(params[1], (3, 3))) model.add(Activation(activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(params[2], (3, 3))) model.add(Activation(activation)) model.add(Conv2D(params[3], (3, 3))) model.add(Activation(activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(params[4])) model.add(Activation(activation)) model.add(Dropout(0.5)) model.add(Dense(params[5])) model.add(Activation(activation)) model.add(Dense(10)) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) if optimizer_name == "sgd": # initiate the SGD optimizer with given hyper parameters optimizer = SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True) elif optimizer_name == "adam": optimizer = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=decay, amsgrad=False) # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=optimizer, metrics=['accuracy']) model.summary() print("Traing a {} layer model, saving to {}".format( len(params) + 1, file_name)) # run training with given dataset, and print progress history = model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) print('model saved to ', file_name) return {'model': model, 'history': history}
def transfer_model(source_df, target_df, test_df, method_flag, fold_num): source_labels, source_data = np.split(np.array(source_df),[1],axis=1) target_labels, target_data = np.split(np.array(target_df),[1],axis=1) test_labels, test_data = np.split(np.array(test_df),[1],axis=1) # normalization #normalized_source_data = pre.normalize(source_data) #normalized_target_data = pre.normalize(target_data) #normalized_test_data = pre.normalize(test_data) normalized_source_data = source_data normalized_target_data = target_data normalized_test_data = test_data ### constuct model for source domain task ### # optimization opt = Adam() # network setting latent = models.latent(normalized_source_data.shape[1]) sll = models.source_last_layer() tll = models.target_last_layer() source_inputs = Input(shape=normalized_source_data.shape[1:]) latent_features = latent(source_inputs) source_predictors = sll(latent_features) latent.trainable = mc._SORUCE_LATENT_TRAIN source_predictors.trainable = True source_nn = Model(inputs=[source_inputs], outputs=[source_predictors]) source_nn.compile(loss=['mean_squared_error'],optimizer=opt) #source_nn.summary() # training using source domain data if method_flag != mc._SCRATCH: source_max_loop = int(normalized_source_data.shape[0]/mc._BATCH_SIZE) source_progbar = Progbar(target=mc._SOURCE_EPOCH_NUM) for epoch in range(mc._SOURCE_EPOCH_NUM): shuffle_data, shuffle_labels, _ = pre.paired_shuffle(normalized_source_data,source_labels,1) for loop in range(source_max_loop): batch_train_data = shuffle_data[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE] batch_train_labels = shuffle_labels[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE] batch_train_labels = np.reshape(batch_train_labels, [len(batch_train_labels)]) one_hots = np.identity(mc._SOURCE_DIM_NUM)[np.array(batch_train_labels, dtype=np.int32)] loss = source_nn.train_on_batch([batch_train_data],[one_hots]) #source_progbar.add(1, values=[("source loss",loss)]) # save #latent.save('../results/source_latent.h5') #sll.save('../results/source_last_layer.h5') # compute relation vectors if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER: target_vectors = np.identity(mc._TARGET_DIM_NUM)[np.array(target_labels, dtype=np.int32)] target_vectors = np.reshape(target_vectors, [target_vectors.shape[0], target_vectors.shape[2]]) elif method_flag == mc._COUNT_ATDL: target_labels, relations = rv.compute_relation_labels(source_nn, normalized_target_data, target_labels, fold_num) target_vectors = np.identity(mc._SOURCE_DIM_NUM)[np.array(target_labels, dtype=np.int32)] target_vectors = np.reshape(target_vectors, [target_vectors.shape[0], target_vectors.shape[2]]) else: relation_vectors = rv.compute_relation_vectors(source_nn, normalized_target_data, target_labels, fold_num, method_flag) target_vectors = np.zeros((len(target_labels),mc._SOURCE_DIM_NUM), dtype=np.float32) for i in range(len(target_labels)): target_vectors[i] = relation_vectors[int(target_labels[i])] ### tuning model for target domain task ### latent.trainable = mc._TARGET_LATENT_TRAIN target_inputs = Input(shape=normalized_target_data.shape[1:]) latent_features = latent(target_inputs) if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER: predictors = tll(latent_features) label_num = mc._TARGET_DIM_NUM else: predictors= sll(latent_features) label_num = mc._SOURCE_DIM_NUM target_nn = Model(inputs=[target_inputs], outputs=[predictors]) target_nn.compile(loss=['mean_squared_error'],optimizer=opt) #target_nn.summary() # training using target domain data target_max_loop = int(normalized_target_data.shape[0]/mc._BATCH_SIZE) target_progbar = Progbar(target=mc._TARGET_EPOCH_NUM) for epoch in range(mc._TARGET_EPOCH_NUM): shuffle_data, shuffle_labels, _ = \ pre.paired_shuffle(normalized_target_data, target_vectors, label_num) for loop in range(target_max_loop): batch_train_data = shuffle_data[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE] batch_train_labels = shuffle_labels[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE] loss = target_nn.train_on_batch([batch_train_data],[batch_train_labels]) #target_progbar.add(1, values=[("target loss",loss)]) # compute outputs of test data of target domain x = target_nn.predict([normalized_test_data]) if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER: idx = np.argmax(x, axis=1) elif method_flag == mc._COUNT_ATDL: idx = np.argmax(x,axis=1) for j in range(len(test_labels)): for i in range(mc._TARGET_DIM_NUM): if test_labels[j] == i: test_labels[j] = relations[i] break else: distance, idx = Neighbors(x, relation_vectors, 1) idx = idx[:,0] backend.clear_session() return idx.T, test_labels.T
def main(): if os.path.isfile(macro._LOCAL_SAVE_DATA) == 0: # download data and compute featuers (see "download_data.py") # atomic_numbers use to compute composition vector # labels is target properties (formation energy) train_labels, compositions, features, atomic_numbers = dl.get_data() # compute bag-of-atom vector that trains GAN (see "preprocess.py") boa_vectors = pre.compute_bag_of_atom_vector(compositions, atomic_numbers) train_data = np.concatenate([boa_vectors, features], axis=1) save_data = pd.DataFrame( np.concatenate([train_labels, train_data], axis=1)) save_data.to_csv(macro._LOCAL_SAVE_DATA, index=False, header=False) else: data = pd.read_csv(macro._LOCAL_SAVE_DATA, delimiter=',', engine="python", header=None) data = np.array(data) train_labels, train_data = np.split(data, [1], axis=1) # normalization of training data such that min is 0 and max is 1 (see "preprocess.py") normalized_train_data, data_max, data_min = pre.normalize_for_train( train_data) normalized_train_labels, max_train_prop, min_train_prop = pre.normalize_for_train( train_labels) # Save normalization parameter to .csv to use generation save_data = pd.DataFrame( np.concatenate([max_train_prop, min_train_prop, data_max, data_min], axis=0)) save_data.to_csv(macro._SAVE_NORMALIZATION_PARAM, index=False, header=False) ### start initialization of training GAN ### # set hyperparameters batch_size = macro._BATCH_SIZE # batch size noise_dim = macro._NOISE_DIM # dimension of noise to input generator property_dim = macro._PROP_DIM # the number of properties lamb = macro._LAMB # hyperparameter for W-GAN-GP max_epoch = macro._MAX_EPOCH # maximum iteration of outer loop max_train_only_dis = macro._MAX_EPOCH_TRAIN_DISCRIMINATOR # maximum iteration of inner loop defined by W-GAN-GP paper (https://arxiv.org/pdf/1704.00028.pdf) max_loop = int(train_data.shape[0] / batch_size) # set model (see "model.py") # in this code, we apply AC-GAN based network architecture (https://arxiv.org/abs/1610.09585) # difference between AC-GAN is that our model is the regression, not classification gen = model.generator(normalized_train_data.shape[1]) dis = model.discriminator(normalized_train_data.shape[1]) # rf is the output layer of discriminator that discriminates real or fake rf = model.real_fake() # pred is the output layer of discriminator that predicts target property pred = model.prediction() # set optimization method dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) # first set discriminator's parameters for training gen.trainable = False # generator's parameter does not update dis.trainable = True rf.trainable = True pred.trainable = True # set variables when inputting real data real_inputs = Input(shape=normalized_train_data.shape[1:]) dis_real_outputs = dis(real_inputs) real_fake_from_real = rf(dis_real_outputs) predictions_from_real = pred(dis_real_outputs) # set variables when inputting fake data fake_inputs = Input(shape=(noise_dim + property_dim, )) gen_fake_outputs = gen(fake_inputs) dis_fake_outputs = dis(gen_fake_outputs) real_fake_from_fake = rf(dis_fake_outputs) # set loss function for discriminator # in this case, we apply W-GAN-GP based loss function because of improving stability # W-GAN-GP (https://arxiv.org/pdf/1704.00028.pdf) # W-GAN-GP is unsupervised training, on the other hand, our model is supervised (conditional). # So, we apply wasserstein_loss to real_fake part and apply mean_squared_error to prediction part interpolate = model.RandomWeightedAverage()( [real_inputs, gen_fake_outputs]) dis_interpolate_outputs = dis(interpolate) real_fake_interpolate = rf(dis_interpolate_outputs) # gradient penalty of W-GAN-GP gp_reg = partial(model.gradient_penalty, interpolate=interpolate, lamb=lamb) gp_reg.__name__ = 'gradient_penalty' # connect inputs and outputs of the discriminator # prediction part is trained by only using training dataset (i.e., predict part is not trained by generated samples) dis_model = Model(inputs=[real_inputs, fake_inputs],\ outputs=[real_fake_from_real, real_fake_from_fake, real_fake_interpolate, predictions_from_real]) # compile dis_model.compile(loss=[model.wasserstein_loss,model.wasserstein_loss,\ gp_reg,'mean_squared_error'],optimizer=dis_opt) # second set generator's parameters for training gen.trainable = True # generator's parameters only update dis.trainable = False rf.trainable = False pred.trainable = False # set variables when inputting noise and target property gen_inputs = Input(shape=(noise_dim + property_dim, )) gen_outputs = gen(gen_inputs) # set variables for discriminator when inputting fake data dis_outputs = dis(gen_outputs) real_fake = rf(dis_outputs) predictions = pred(dis_outputs) # connect inputs and outputs of the discriminator gen_model = Model(inputs=[gen_inputs], outputs=[real_fake, predictions]) # compile # generator is trained by real_fake classification and prediction of target property gen_model.compile(loss=[model.wasserstein_loss, 'mean_squared_error'], optimizer=gen_opt) # if you need progress bar progbar = Progbar(target=max_epoch) # set the answer to train each model real_label = [-1] * batch_size fake_label = [1] * batch_size dummy_label = [0] * batch_size #real = np.zeros((batch_size,train_data.shape[1]), dtype=np.float32) inputs = np.zeros((batch_size, noise_dim + property_dim), dtype=np.float32) # epoch for epoch in range(max_epoch): # iteration for loop in range(max_loop): # shuffle to change the trainng order and select data sdata, slabels, bak = pre.paired_shuffle(normalized_train_data, normalized_train_labels) real = sdata[loop * batch_size:(loop + 1) * batch_size] properties = slabels[loop * batch_size:(loop + 1) * batch_size] # generator's parameters does not update gen.trainable = False dis.trainable = True rf.trainable = True pred.trainable = True # train discriminator for train_only_dis in range(max_train_only_dis): noise = np.random.uniform( -1, 1, (batch_size, noise_dim)).astype(np.float32) for i in range(len(noise)): inputs[i] = np.hstack((noise[i], properties[i])) dis_loss = dis_model.train_on_batch( [real, inputs], [real_label, fake_label, dummy_label, properties]) # second train only generator gen.trainable = True dis.trainable = False rf.trainable = False pred.trainable = False noise = np.random.uniform(-1, 1, (batch_size, noise_dim)).astype( np.float32) for i in range(len(noise)): inputs[i] = np.hstack((noise[i], properties[i])) gen_loss = gen_model.train_on_batch([inputs], [real_label, properties]) # if you need progress bar progbar.add(1, values=[("dis_loss", dis_loss[0]), ("gen_loss", gen_loss[0])]) # save generated samples and models eval.save(normalized_train_data, gen, dis, pred, rf) backend.clear_session()
def main(): batch_size = _BATCH_SIZE noise_dim = _NOISE_DIM lamb = 10.0 train = get_data() train_images, train_labels = make_batch(train) gen = generator() dis = discriminator() gen.summary() dis.summary() dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9) gen.trainable = True dis.trainable = False gen_inputs = Input(shape=(noise_dim, )) gen_outputs = gen(gen_inputs) dis_outputs = dis(gen_outputs) gen_model = Model(inputs=[gen_inputs], outputs=[dis_outputs]) gen_model.compile(loss=wasserstein_loss, optimizer=gen_opt) gen_model.summary() gen.trainable = False dis.trainable = True real_inputs = Input(shape=train_images.shape[1:]) dis_real_outputs = dis(real_inputs) fake_inputs = Input(shape=(noise_dim, )) gen_fake_outputs = gen(fake_inputs) dis_fake_outputs = dis(gen_fake_outputs) interpolate = RandomWeightedAverage()([real_inputs, gen_fake_outputs]) dis_interpolate_outputs = dis(interpolate) gp_reg = partial(gradient_penalty, interpolate=interpolate, lamb=lamb) #gp_reg.__name__ = 'gradient_penalty' dis_model = Model(inputs=[real_inputs, fake_inputs],\ outputs=[dis_real_outputs, dis_fake_outputs,dis_interpolate_outputs]) dis_model.compile(loss=[wasserstein_loss, wasserstein_loss, gp_reg], optimizer=dis_opt) dis_model.summary() max_epoch = 10001 max_train_only_dis = 5 minibatch_size = batch_size * max_train_only_dis max_loop = int(train_images.shape[0] / minibatch_size) real = np.zeros((batch_size, train_images.shape[1], train_images.shape[2], train_images.shape[3]), dtype=np.float32) minibatch_train_images = np.zeros( (minibatch_size, train_images.shape[1], train_images.shape[2], train_images.shape[3]), dtype=np.float32) progbar = Progbar(target=max_epoch) real_label = [-1] * batch_size fake_label = [1] * batch_size dummy_label = [0] * batch_size for epoch in range(max_epoch): np.random.shuffle(train_images) for loop in range(max_loop): minibatch_train_images = train_images[loop * minibatch_size:(loop + 1) * minibatch_size] for train_only_dis in range(max_train_only_dis): real = minibatch_train_images[train_only_dis * batch_size:(train_only_dis + 1) * batch_size] noise = np.random.uniform( -1, 1, (batch_size, noise_dim)).astype(np.float32) dis_loss = dis_model.train_on_batch( [real, noise], [real_label, fake_label, dummy_label]) noise = np.random.uniform(-1, 1, (batch_size, noise_dim)).astype( np.float32) gen_loss = gen_model.train_on_batch(noise, real_label) progbar.add(1, values=[("dis_loss", dis_loss[0]), ("gen_loss", gen_loss)]) if epoch % 100 == 0: noise = np.random.uniform(-1, 1, (batch_size, 10)).astype(np.float32) fake = gen.predict(noise) tmp = [r.reshape(-1, 32) for r in fake] tmp = np.concatenate(tmp, axis=1) img = ((tmp / 2.0 + 0.5) * 255.0).astype(np.uint8) Image.fromarray(img).save("generate/%d.png" % (epoch)) backend.clear_session()
def train(data, file_name, nlayer, num_epochs=10, batch_size=128, train_temp=1, init=None, activation=tf.nn.relu): """ Train a n-layer CNN for MNIST and CIFAR """ inputs = Input(shape=(28, 28, 1)) if nlayer == 2: x = Residual2(8, activation)(inputs) x = Lambda(activation)(x) x = Residual2(16, activation)(x) x = Lambda(activation)(x) x = AveragePooling2D(pool_size=7)(x) x = Flatten()(x) x = Dense(10)(x) if nlayer == 3: x = Residual2(8, activation)(inputs) x = Lambda(activation)(x) x = Residual(8, activation)(x) x = Lambda(activation)(x) x = Residual2(16, activation)(x) x = Lambda(activation)(x) x = AveragePooling2D(pool_size=7)(x) x = Flatten()(x) x = Dense(10)(x) if nlayer == 4: x = Residual2(8, activation)(inputs) x = Lambda(activation)(x) x = Residual(8, activation)(x) x = Lambda(activation)(x) x = Residual2(16, activation)(x) x = Lambda(activation)(x) x = Residual(16, activation)(x) x = Lambda(activation)(x) x = AveragePooling2D(pool_size=7)(x) x = Flatten()(x) x = Dense(10)(x) if nlayer == 5: x = Residual2(8, activation)(inputs) x = Lambda(activation)(x) x = Residual(8, activation)(x) x = Lambda(activation)(x) x = Residual(8, activation)(x) x = Lambda(activation)(x) x = Residual2(16, activation)(x) x = Lambda(activation)(x) x = Residual(16, activation)(x) x = Lambda(activation)(x) x = AveragePooling2D(pool_size=7)(x) x = Flatten()(x) x = Dense(10)(x) model = Model(inputs=inputs, outputs=x) # load initial weights when given if init != None: model.load_weights(init) # define the loss function which is the cross entropy between prediction and true label def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted / train_temp) # initiate the Adam optimizer sgd = Adam() # compile the Keras model, given the specified loss and optimizer model.compile(loss=fn, optimizer=sgd, metrics=['accuracy']) model.summary() # run training with given dataset, and print progress history = model.fit(data.train_data, data.train_labels, batch_size=batch_size, validation_data=(data.validation_data, data.validation_labels), epochs=num_epochs, shuffle=True) # save model to a file if file_name != None: model.save(file_name) return {'model': model, 'history': history}