# X_train = X_train.reshape(-1,TIME_STEPS,IMPUT_SIZE) #normalize # X_test = X_test.reshape(-1,TIME_STEPS,IMPUT_SIZE) #normalize y_train = np_utils.to_categorical(y_train, num_classes=2) y_test = np_utils.to_categorical(y_test, num_classes=2) #print(y_train.shape) model = Sequential() #RNN cell model.add(SimpleRNN(CELL_SIZE, input_shape=(TIME_STEPS, IMPUT_SIZE))) #output layer model.add(Dense(OUTPUT_SIZE)) model.add(Activation('softmax')) #optimizer Adagrad = Adagrad(LR) model.compile(optimizer=Adagrad, loss='mean_squared_error', metrics=['accuracy']) #train print("Training---------") model.fit(X_train, y_train, epochs=8000, batch_size=BATCH_SIZE) print("\nTesting---------") cost, accuracy = model.evaluate(X_test, y_test, batch_size=y_test.shape[0], verbose=False)
def _training_model(vec, ac_weights, rl_weights, output_folder, args): """Example function with types documented in the docstring. Args: param1 (int): The first parameter. param2 (str): The second parameter. Returns: bool: The return value. True for success, False otherwise. """ print('Build model...') print(args) # ============================================================================= # Input layer # ============================================================================= ac_input = Input(shape=(vec['prefixes']['activities'].shape[1], ), name='ac_input') rl_input = Input(shape=(vec['prefixes']['roles'].shape[1], ), name='rl_input') t_input = Input(shape=(vec['prefixes']['times'].shape[1], 1), name='t_input') # ============================================================================= # Embedding layer for categorical attributes # ============================================================================= ac_embedding = Embedding( ac_weights.shape[0], ac_weights.shape[1], weights=[ac_weights], input_length=vec['prefixes']['activities'].shape[1], trainable=False, name='ac_embedding')(ac_input) rl_embedding = Embedding(rl_weights.shape[0], rl_weights.shape[1], weights=[rl_weights], input_length=vec['prefixes']['roles'].shape[1], trainable=False, name='rl_embedding')(rl_input) # ============================================================================= # Layer 1 # ============================================================================= merged = Concatenate(name='concatenated', axis=2)([ac_embedding, rl_embedding]) l1_c1 = LSTM(args['l_size'], kernel_initializer='glorot_uniform', return_sequences=True, dropout=0.2, implementation=args['imp'])(merged) l1_c3 = LSTM(args['l_size'], activation=args['lstm_act'], kernel_initializer='glorot_uniform', return_sequences=True, dropout=0.2, implementation=args['imp'])(t_input) # ============================================================================= # Batch Normalization Layer # ============================================================================= batch1 = BatchNormalization()(l1_c1) batch3 = BatchNormalization()(l1_c3) # ============================================================================= # The layer specialized in prediction # ============================================================================= l2_c1 = LSTM(args['l_size'], kernel_initializer='glorot_uniform', return_sequences=False, dropout=0.2, implementation=args['imp'])(batch1) # The layer specialized in role prediction l2_c2 = LSTM(args['l_size'], kernel_initializer='glorot_uniform', return_sequences=False, dropout=0.2, implementation=args['imp'])(batch1) # The layer specialized in role prediction l2_3 = LSTM(args['l_size'], activation=args['lstm_act'], kernel_initializer='glorot_uniform', return_sequences=False, dropout=0.2, implementation=args['imp'])(batch3) # ============================================================================= # Output Layer # ============================================================================= act_output = Dense(ac_weights.shape[0], activation='softmax', kernel_initializer='glorot_uniform', name='act_output')(l2_c1) role_output = Dense(rl_weights.shape[0], activation='softmax', kernel_initializer='glorot_uniform', name='role_output')(l2_c2) if ('dense_act' in args) and (args['dense_act'] is not None): time_output = Dense(1, activation=args['dense_act'], kernel_initializer='glorot_uniform', name='time_output')(l2_3) else: time_output = Dense(1, kernel_initializer='glorot_uniform', name='time_output')(l2_3) model = Model(inputs=[ac_input, rl_input, t_input], outputs=[act_output, role_output, time_output]) if args['optim'] == 'Nadam': opt = Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.999) elif args['optim'] == 'Adam': opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) elif args['optim'] == 'SGD': opt = SGD(learning_rate=0.01, momentum=0.0, nesterov=False) elif args['optim'] == 'Adagrad': opt = Adagrad(learning_rate=0.01) model.compile(loss={ 'act_output': 'categorical_crossentropy', 'role_output': 'categorical_crossentropy', 'time_output': 'mae' }, optimizer=opt) model.summary() early_stopping = EarlyStopping(monitor='val_loss', patience=50) cb = tc.TimingCallback(output_folder) clean_models = cm.CleanSavedModelsCallback(output_folder, 2) # Output file output_file_path = os.path.join( output_folder, 'model_' + str(args['model_type']) + '_{epoch:02d}-{val_loss:.2f}.h5') # Saving model_checkpoint = ModelCheckpoint(output_file_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto') lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0) batch_size = vec['prefixes']['activities'].shape[1] model.fit( { 'ac_input': vec['prefixes']['activities'], 'rl_input': vec['prefixes']['roles'], 't_input': vec['prefixes']['times'] }, { 'act_output': vec['next_evt']['activities'], 'role_output': vec['next_evt']['roles'], 'time_output': vec['next_evt']['times'] }, validation_split=0.2, verbose=2, callbacks=[ early_stopping, model_checkpoint, lr_reducer, cb, clean_models ], batch_size=batch_size, epochs=200)
model.add( TimeDistributed( AveragePooling2D(pool_size=(1, 2), border_mode='valid'))) model.add(TimeDistributed(Flatten())) model.add(LSTM(output_dim=128, return_sequences=True)) model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(256))) model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(2, activation='softmax'))) model.add(time_distributed_merge_layer) rmsprop = RMSprop() sgd = SGD(momentum=0.9) adadelta = Adadelta() nadam = Nadam() adagrad = Adagrad() model.compile(loss='binary_crossentropy', optimizer=rmsprop, metrics=['accuracy']) model.summary() model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs, validation_data=(X_test, Y_test), verbose=1) foldNo = foldNo + 1 del X_train
else: init_lr = lr_schedule(0) #ResNet: # model = keras.applications.resnet50.ResNet50(input_shape=None, include_top=True, weights=None) model = resnet_v1(input_shape=input_shape, depth=5 * 6 + 2, num_classes=num_classes) if optimizer == 'Adam': opt = Adam(lr=init_lr) elif optimizer == 'SGD': opt = SGD(lr=init_lr) elif optimizer == 'RMSprop': opt = RMSprop(lr=init_lr) elif optimizer == 'Adagrad': opt = Adagrad(lr=init_lr) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy', 'top_k_categorical_accuracy']) # model.summary() print("-" * 20 + exp_name + '-' * 20) # Prepare model model saving directory. lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
def train(path_loader, min_iterations, aton_iteration, val_file, val_notes, num_features, batchsize, save_best): read_file_loader(path_loader, 0) # Load train set read_file_loader(val_file, 1) # Load val set read_annotation(val_notes, 1) # Load val annotation model = create_model() adagrad = Adagrad(lr=0.01, epsilon=1e-08) model.compile(loss='binary_crossentropy', optimizer=adagrad) Results_Path = os.path.join( 'results/strong/VAL', str(aton_iteration)) # Directory to save val stats in training output_dir = os.path.join( 'models/strong_model', str(aton_iteration)) # Directory to save models and checkpoints model_path = os.path.join(output_dir, 'model.json') best_model_path = os.path.join(output_dir, 'best_model.json') best_weights_path = os.path.join(output_dir, 'best_weights.mat') if not os.path.exists(Results_Path): os.makedirs(Results_Path) if not os.path.exists(output_dir): os.makedirs(output_dir) train_dataset = Dataset('dataset/train', False, False, num_features) # Create train dataset val_dataset = Dataset('dataset/val', True, False, num_features) # Create val dataset loss_graph = [] full_batch_loss = [] total_iterations = 0 bestAUC = 0 previousAUC = [0] print('Train dataset: ' + str(len(train_dataset))) plt.ion() print('Training Strong Classifier...') prog = pyprog.ProgressBar("", " AUC - " + str(round(previousAUC[-1], 4)) + '%', total=min_iterations, bar_length=50, complete_symbol="=", not_complete_symbol=" ", wrap_bar_prefix=" [", wrap_bar_suffix="] ", progress_explain="", progress_loc=pyprog.ProgressBar.PROGRESS_LOC_END) prog.update() while total_iterations != min_iterations: inputs, targets, stats = load_dataset_train_batch( train_dataset, batchsize) # Load normal and abnormal video C3D features batch_loss = model.train_on_batch(inputs, targets) full_batch_loss.append(float(batch_loss)) statistics.stats_batch(full_batch_loss, aton_iteration) loss_graph = np.hstack((loss_graph, batch_loss)) total_iterations += 1 if total_iterations % 20 == 0: iteration_path = output_dir + 'Iterations_graph_' + str( total_iterations) + '.mat' savemat(iteration_path, dict(loss_graph=loss_graph)) # Loss checkpoint previousAUC.append( auc(model, val_dataset, total_iterations, aton_iteration)) # Validation results if previousAUC[-1] > bestAUC and save_best: save_model(model, best_model_path, best_weights_path) # Best model checkpoint bestAUC = previousAUC[-1] weights_path = output_dir + 'weightsStrong_' + str( total_iterations) + '.mat' save_model(model, model_path, weights_path) # Model checkpoint prog.set_suffix(" AUC - " + str(round(previousAUC[-1], 4)) + '% | Best AUC - ' + str(round(bestAUC, 4)) + '%') prog.set_stat(total_iterations) prog.update() prog.end() plt.ioff() save_model( model, best_model_path, best_weights_path ) if not save_best else None # Save last as best if the best was not kept
def setOptimizer(self, **kwargs): """ Sets and compiles a new optimizer for the Translation_Model. :param kwargs: :return: """ # compile differently depending if our model is 'Sequential' or 'Graph' if self.verbose > 0: logging.info("Preparing optimizer: %s [LR: %s - LOSS: %s - " "CLIP_C %s - CLIP_V %s - LR_OPTIMIZER_DECAY %s] and" " compiling." % (str(self.params['OPTIMIZER']), str(self.params.get('LR', 0.01)), str(self.params.get('LOSS', 'categorical_crossentropy')), str(self.params.get('CLIP_C', 0.)), str(self.params.get('CLIP_V', 0.)), str(self.params.get('LR_OPTIMIZER_DECAY', 0.0)) )) if self.params['OPTIMIZER'].lower() == 'sgd': optimizer = SGD(lr=self.params.get('LR', 0.01), momentum=self.params.get('MOMENTUM', 0.0), decay=self.params.get('LR_OPTIMIZER_DECAY', 0.0), nesterov=self.params.get('NESTEROV_MOMENTUM', False), clipnorm=self.params.get('CLIP_C', 0.), clipvalue=self.params.get('CLIP_V', 0.), ) elif self.params['OPTIMIZER'].lower() == 'rsmprop': optimizer = RMSprop(lr=self.params.get('LR', 0.001), rho=self.params.get('RHO', 0.9), decay=self.params.get('LR_OPTIMIZER_DECAY', 0.0), clipnorm=self.params.get('CLIP_C', 0.), clipvalue=self.params.get('CLIP_V', 0.)) elif self.params['OPTIMIZER'].lower() == 'adagrad': optimizer = Adagrad(lr=self.params.get('LR', 0.01), decay=self.params.get('LR_OPTIMIZER_DECAY', 0.0), clipnorm=self.params.get('CLIP_C', 0.), clipvalue=self.params.get('CLIP_V', 0.)) elif self.params['OPTIMIZER'].lower() == 'adadelta': optimizer = Adadelta(lr=self.params.get('LR', 1.0), rho=self.params.get('RHO', 0.9), decay=self.params.get('LR_OPTIMIZER_DECAY', 0.0), clipnorm=self.params.get('CLIP_C', 0.), clipvalue=self.params.get('CLIP_V', 0.)) elif self.params['OPTIMIZER'].lower() == 'adam': optimizer = Adam(lr=self.params.get('LR', 0.001), beta_1=self.params.get('BETA_1', 0.9), beta_2=self.params.get('BETA_2', 0.999), decay=self.params.get('LR_OPTIMIZER_DECAY', 0.0), clipnorm=self.params.get('CLIP_C', 0.), clipvalue=self.params.get('CLIP_V', 0.)) elif self.params['OPTIMIZER'].lower() == 'adamax': optimizer = Adamax(lr=self.params.get('LR', 0.002), beta_1=self.params.get('BETA_1', 0.9), beta_2=self.params.get('BETA_2', 0.999), decay=self.params.get('LR_OPTIMIZER_DECAY', 0.0), clipnorm=self.params.get('CLIP_C', 0.), clipvalue=self.params.get('CLIP_V', 0.)) elif self.params['OPTIMIZER'].lower() == 'nadam': optimizer = Nadam(lr=self.params.get('LR', 0.002), beta_1=self.params.get('BETA_1', 0.9), beta_2=self.params.get('BETA_2', 0.999), schedule_decay=self.params.get( 'LR_OPTIMIZER_DECAY', 0.0), clipnorm=self.params.get('CLIP_C', 0.), clipvalue=self.params.get('CLIP_V', 0.)) else: logging.info( '\tWARNING: The modification of the LR is not implemented for ' 'the chosen optimizer.') optimizer = eval(self.params['OPTIMIZER']) self.model.compile(optimizer=optimizer, loss=self.params['LOSS'], metrics=self.params.get('KERAS_METRICS', []), sample_weight_mode='temporal' if self.params[ 'SAMPLE_WEIGHTS'] else None)
def pass_arg(Xx, nsim, tr_size, droprate): print("Tr_size:", tr_size) def fix_seeds(seed): random.seed(seed) np.random.seed(seed) tf.random.set_seed(seed) session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf) # K.set_session(sess) tf.compat.v1.keras.backend.set_session(sess) ss = 1 fix_seeds(ss) # import pickle # def save_obj(obj, name): # with open(name, 'wb') as f: # pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) # Compute the RMSE given the ground truth (y_true) and the predictions(y_pred) def root_mean_squared_error(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) # # Making sure dimensionless bond length is less than 1 # def bond(bl): # return bl-1.0 # Making sure dimensionless bond length is less than 1 def bond(bl): bln = -bl * (bl < 0) blp = bl * (bl >= 1.0) - 1 * (bl >= 1.0) return bln + blp # # Making sure final porosity is less than initial # def poros(poroi, porof): # # porof[porof < 0] = 1-porof[porof < 0] # porof[porof < 0] = poroi[0]-porof[porof < 0] # print(porof) # return porof-poroi # Making sure final porosity is less than initial def poros(poroi, porof): porofn = -porof * (porof < 0) porofp = porof * (porof >= poroi) - poroi * (porof >= poroi) return porofp + porofn # def strength(bl, porof, nlayer=6): # discp = [] # sigma01, sigma02 = 6, 31 # C1s = 21 # sigma_long = sigma01*(np.exp((1.0-porof)**(C1s*nlayer))-porof) + sigma02*(1.0-porof) # # print("sigma_long:",sigma_long) # for i in range(len(sigma_long)): # for j in range(i + 1, len(sigma_long)): # if (sigma_long[j] > sigma_long[i]): # discp.append(bl[i] - bl[j]) # discp = np.array(discp) # print(discp) # return discp def strength1(bl, porof, nlayer=6): sigma01, sigma02 = 6, 31 C1s = 21 sigma_long = sigma01 * (np.exp( (1.0 - porof)**(C1s * nlayer)) - porof) + sigma02 * (1.0 - porof) sigma_long_sorted = np.sort(sigma_long, axis=-1) # sorts along first axis (down) ind = np.argsort(sigma_long, axis=-1) # sorts along first axis (down) bl_sorted = np.take_along_axis(bl, ind, axis=-1) # same as np.sort(x, axis=0) corr_bl_sorted = np.sort(bl, axis=-1) # sorts along first axis (down) return corr_bl_sorted - bl_sorted def strength2(bl, porof, nlayer=6): sigma01, sigma02 = 6, 31 C1s = 21 sigma_long = sigma01 * (np.exp( (1.0 - porof)**(C1s * nlayer)) - porof) + sigma02 * (1.0 - porof) sigma_long_sorted = np.sort(sigma_long, axis=-1) # sorts along first axis (down) ind = np.argsort(sigma_long, axis=-1) # sorts along first axis (down) bl_sorted = np.take_along_axis(bl, ind, axis=-1) # same as np.sort(x, axis=0) return sum(bl_sorted[1:] - bl_sorted[:-1] < 0) / 14 def phy_loss_mean(params): # useful for cross-checking training loss1, loss2, loss3, loss4, lam1, lam2 = params x1, x2, x3 = loss1 * (loss1 > 0), loss2 * (loss2 > 0), loss3 * (loss3 > 0) # print(np.mean(x1), x1.shape[0]) # print(np.mean(x2), x2.shape[0]) # print(np.mean(x3), x3.shape[0]) if x1.any() and x1.shape[0] > 1: X_scaled1 = (x1 - np.min(x1)) / (np.max(x1) - np.min(x1)) x1 = X_scaled1 if x2.any() and x2.shape[0] > 1: X_scaled2 = (x2 - np.min(x2)) / (np.max(x2) - np.min(x2)) x2 = X_scaled2 if x3.any() and x3.shape[0] > 1: X_scaled3 = (x3 - np.min(x3)) / (np.max(x3) - np.min(x3)) x3 = X_scaled3 return (lam1 * np.mean(x1) + lam2 * np.mean(x2) + lam2 * np.mean(x3)) # return (lam1*np.mean(x1) + lam2*np.mean(x2) + lam2*np.mean(x3) + lam2*loss4) # def phy_loss_mean(params): # # useful for cross-checking training # diff1, diff2, lam1, lam2 = params # x1, x2 = diff1*(diff1>0), diff2*(diff2>0) # if np.any(x1): # X_scaled1 = (x1 - np.min(x1)) / (np.max(x1) - np.min(x1)) # x1 = X_scaled1 # if np.any(x2): # X_scaled2 = (x2 - np.min(x2)) / (np.max(x2) - np.min(x2)) # x2 = X_scaled2 # return lam1*np.mean(x1) + lam2*np.mean(x2) def PGNN_train_test(optimizer_name, optimizer_val, drop_rate, iteration, n_layers, n_nodes, tr_size, pre_train): # Hyper-parameters of the training process # batch_size = int(tr_size/2) batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = "Pre-train" + optimizer_name + '_drop' + str( drop_rate) + '_nL' + str(n_layers) + '_nN' + str( n_nodes) + '_trsize' + str(tr_size) + '_iter' + str(iteration) exp_name = exp_name.replace('.', 'pt') results_dir = '../results/' model_name = results_dir + exp_name + '.h5' # storing the trained model results_name = results_dir + exp_name + '_results.dat' # storing the results of the model # Load labeled data # data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat') # data1 = data[:1303, :] # data2 = data[-6:, :] # datah = np.vstack((data1,data2)) # np.random.shuffle(datah) # x_unlabeled = datah[:, :2] # 1303 last regular sample # y_unlabeled = datah[:, -3:-1] # Load labeled data # Load labeled data data = np.loadtxt('../data/labeled_data.dat') x_labeled = data[:, : 2] # -2 because we do not need porosity predictions y_labeled = data[:, -3:-1] # dimensionless bond length and # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0, 1.0)) # scaler = preprocessing.StandardScaler() x_labeled = scaler.fit_transform(x_labeled) # x_unlabeled = scaler.fit_transform(x_unlabeled) # y_labeled = scaler.fit_transform(y_labeled) # train and test data trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size] testX, testY = x_labeled[tr_size:, :], y_labeled[tr_size:] dependencies = {'root_mean_squared_error': root_mean_squared_error} # load the pre-trained model using non-calibrated physics-based model predictions (./data/unlabeled.dat) loaded_model = load_model(results_dir + pre_train, custom_objects=dependencies) # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add( Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1], ))) else: model.add( Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) model.add(Dropout(rate=drop_rate)) model.add(Dense(2, activation='linear')) # pass the weights to all layers but 1st input layer, whose dimensions are updated for new_layer, layer in zip(model.layers[1:], loaded_model.layers[1:]): new_layer.set_weights(layer.get_weights()) model.compile(loss='mean_squared_error', optimizer=optimizer_val, metrics=[root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) test_score = model.evaluate(testX, testY, verbose=1) print(test_score) # predictions = model.predict(testX) # # inv_pred = scaler.inverse_transform(predictions) # phyloss1 = bond(predictions[:,0]) # physics loss 1 # # init_poro_ndim = np.ones((init_poro.shape)) # # diff2 = poros(init_poro_ndim, predictions[:,1]) # physics loss 2 # phyloss2 = poros(init_poro, predictions[:,1]) # physics loss 2 # phyloss3 = strength1(predictions[:,0], predictions[:,1]) # phyloss4 = strength2(predictions[:,0], predictions[:,1]) # lam1, lam2 = lamda[0], lamda[1] # phyloss = phy_loss_mean([phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2]) # print('iter: ' + str(iteration) + # ' nL: ' + str(n_layers) + ' nN: ' + str(n_nodes) + # ' trsize: ' + str(tr_size) + # ' TestRMSE: ' + str(test_score[1]) + ' PhyLoss: ' + str(phyloss), "\n") # # model.save(model_name) # # save results # results = {'train_rmse':history.history['root_mean_squared_error'], # 'val_rmse':history.history['val_root_mean_squared_error'], # 'test_rmse':test_score[1], 'PhyLoss':phyloss} # save_obj(results, results_name) # return results, results_name, predictions, testY, test_score[1] # predictions = model.predict(Xx) samples = [] for i in range(int(nsim)): print("simulation num:", i) predictions = model.predict(Xx) predictions = predictions[:, 1] samples.append(predictions[:, np.newaxis]) return np.array(samples) # Main Function if __name__ == '__main__': fix_seeds(1) # List of optimizers to choose from optimizer_names = [ 'Adagrad', 'Adadelta', 'Adam', 'Nadam', 'RMSprop', 'SGD', 'NSGD' ] optimizer_vals = [ Adagrad(clipnorm=1), Adadelta(clipnorm=1), Adam(clipnorm=1), Nadam(clipnorm=1), RMSprop(clipnorm=1), SGD(clipnorm=1.), SGD(clipnorm=1, nesterov=True) ] # selecting the optimizer optimizer_num = 1 optimizer_name = optimizer_names[optimizer_num] optimizer_val = optimizer_vals[optimizer_num] # Selecting Other Hyper-parameters drop_rate = droprate # Fraction of nodes to be dropped out n_layers = 2 # Number of hidden layers n_nodes = 5 # Number of nodes per hidden layer # # Iterating over different training fractions and splitting indices for train-test splits # trsize_range = [4,6,8,10,20] # #default training size = 5000 # tr_size = trsize_range[4] # pre-trained model pre_train = 'Pre-trainAdadelta_drop0_nL2_nN5_trsize1308_iter0.h5' tr_size = int(tr_size) # # use regularizer # reg = True # #set lamda=0 for pgnn0 # lamda = [1, 1] # Physics-based regularization constant # total number of runs iter_range = np.arange(1) testrmse = [] # iterating through all possible params for iteration in iter_range: # results, result_file, pred, obs, rmse = PGNN_train_test(optimizer_name, optimizer_val, drop_rate, # iteration, n_layers, n_nodes, tr_size, lamda, reg) # testrmse.append(rmse) pred = PGNN_train_test(optimizer_name, optimizer_val, drop_rate, iteration, n_layers, n_nodes, tr_size, pre_train) return np.squeeze(pred)
nb_epoch = 30 batch_size = 64 nb_kernels = 16 # filter的数量 kernel_size = (3, 3) activation_fun = 'relu' nb_pool = 2 # pooling seize size_conv = (3, 3) # kernel size sgd = SGD( lr=0.1, # learning rate decay=1e-6, # 每次更新后的学习率衰减值 momentum=0.9, # 动量 nesterov=True # 使用Nesterov动量 ) adam = adam() ada_grad = Adagrad() rmsprop = RMSprop() def LeNet(kernel_size=(5, 5), activation='tanh'): model = Sequential() model.add( Conv2D(filters=6, kernel_size=kernel_size, strides=(1, 1), padding='valid', input_shape=shape_ord, activation=activation, name='Conv1')) model.add(
normal_dir = cfg.processed_normal_train_features abnormal_dir = cfg.processed_abnormal_train_features normal_list = os.listdir(normal_dir) normal_list.sort() abnormal_list = os.listdir(abnormal_dir) abnormal_list.sort() weights_path = output_dir + 'weights_proposal.mat' model_path = output_dir + 'model_proposal.json' #Create Full connected Model model = classifier.classifier_model() adagrad = Adagrad(lr=0.002, epsilon=1e-07) model.compile(loss=custom_objective, optimizer=adagrad) if not os.path.exists(output_dir): os.makedirs(output_dir) loss_graph = [] num_iters = 20000 total_iterations = 0 batchsize = 60 time_before = datetime.now() for it_num in range(num_iters): inputs, targets = load_batch_train(normal_dir, normal_list, abnormal_dir, abnormal_list) batch_loss = model.train_on_batch(inputs, targets)
return lr #ResNet: # model = keras.applications.resnet50.ResNet50(input_shape=None, include_top=True, weights=None) model = resnet_v1(input_shape=input_shape, depth=5 * 6 + 2, num_classes=num_classes) if optimizer == 'Adam': opt = Adam(lr=lr_schedule(0)) elif optimizer == 'SGD': opt = SGD(lr=lr_schedule(0)) elif optimizer == 'RMSprop': opt = RMSprop(lr=lr_schedule(0)) elif optimizer == 'Adagrad': opt = Adagrad(lr=lr_schedule(0)) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy', 'top_k_categorical_accuracy']) # model.summary() print("-" * 20 + exp_name + '-' * 20) # Prepare model model saving directory. lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
def pass_arg(Xx, nsim, tr_size, droprate): print("Tr_size:", tr_size) def fix_seeds(seed): random.seed(seed) np.random.seed(seed) tf.random.set_seed(seed) session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf) # K.set_session(sess) tf.compat.v1.keras.backend.set_session(sess) ss = 1 fix_seeds(ss) # import pickle # def save_obj(obj, name): # with open(name, 'wb') as f: # pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) # Compute the RMSE given the ground truth (y_true) and the predictions(y_pred) def root_mean_squared_error(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) # Making sure dimensionless bond length is less than 1 def bond(bl): return tf.add(K.relu(tf.negative(bl)), K.relu(bl - 1.0)) # Making sure final porosity is less than initial def poros(poroi, porof): return K.relu(tf.negative(porof)) + K.relu(porof - poroi) def strength1(bl, porof, nlayer=6): sigma01, sigma02, C1s = 6, 31, 21 sigma_long = sigma01 * (K.exp( (1.0 - porof)**(C1s * nlayer)) - porof) + sigma02 * (1.0 - porof) n = K.shape(sigma_long)[0] sorted_strength, sortedIndices = tf.math.top_k(sigma_long, n, True) sorted_bl = K.gather(bl, sortedIndices) sorted_porof = K.gather(porof, sortedIndices) argg = tf.argsort(sorted_bl, axis=-1, direction='DESCENDING', stable=False, name=None) sorted_bl_corr = K.gather(sorted_bl, argg) return sorted_bl_corr - sorted_bl def strength2(bl, porof, nlayer=6): sigma01, sigma02, C1s = 6, 31, 21 sigma_long = sigma01 * (K.exp( (1.0 - porof)**(C1s * nlayer)) - porof) + sigma02 * (1.0 - porof) n = K.shape(sigma_long)[0] sorted_strength, sortedIndices = tf.math.top_k(sigma_long, n, True) sorted_bl = K.gather(bl, sortedIndices) n = K.cast(n, tf.float32) rel = K.relu(sorted_bl[1:] - sorted_bl[0:-1]) num_vio = K.cast(tf.math.count_nonzero(rel), tf.float32) return num_vio / n def phy_loss_mean(params): # useful for cross-checking training loss1, loss2, loss3, loss4, lam1, lam2, lam3, lam4 = params def loss(y_true, y_pred): # return lam1*K.mean(K.relu(loss1)) + lam2*K.mean(K.relu(loss2)) + lam2*K.mean(K.relu(loss3)) return lam1 * K.mean(K.relu(loss1)) + lam2 * K.mean( K.relu(loss2)) + lam3 * K.mean(K.relu(loss3)) + lam4 * loss4 return loss #function to calculate the combined loss = sum of rmse and phy based loss def combined_loss(params): loss1, loss2, loss3, loss4, lam1, lam2, lam3, lam4 = params def loss(y_true, y_pred): # return mean_squared_error(y_true, y_pred) + lam1 * K.mean(K.relu(loss1)) + lam2 * K.mean(K.relu(loss2)) + lam2 * K.mean(K.relu(loss3)) return mean_squared_error(y_true, y_pred) + lam1 * K.mean( K.relu(loss1)) + lam2 * K.mean(K.relu(loss2)) + lam3 * K.mean( K.relu(loss3)) + lam4 * loss4 return loss def PGNN_train_test(optimizer_name, optimizer_val, drop_frac, use_YPhy, iteration, n_layers, n_nodes, tr_size, lamda, reg, samp): # fix_seeds(ss) # Hyper-parameters of the training process # batch_size = tr_size batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = optimizer_name + '_drop' + str(drop_frac) + '_usePhy' + str( use_YPhy) + '_nL' + str(n_layers) + '_nN' + str( n_nodes) + '_trsize' + str(tr_size) + '_lamda' + str( lamda) + '_iter' + str(iteration) exp_name = exp_name.replace('.', 'pt') results_dir = '../results/' model_name = results_dir + exp_name + '_model.h5' # storing the trained model if reg == True and samp == 25: results_name = results_dir + exp_name + '_results_25_regularizer.dat' # storing the results of the model elif reg == False and samp == 25: results_name = results_dir + exp_name + '_results_25.dat' # storing the results of the model elif reg == True and samp == 1519: results_name = results_dir + exp_name + '_results_1519_regularizer.dat' # storing the results of the model elif reg == False and samp == 1519: results_name = results_dir + exp_name + '_results_1519.dat' # storing the results of the model # Load labeled data data = np.loadtxt('../data/labeled_data.dat') # data = np.loadtxt('../data/labeled_data_BK_constw_unique.dat') # data = np.loadtxt('../data/labeled_data_BK_constw_v2.dat') # x_labeled = data[:, :2] # -2 because we do not need porosity predictions x_label = data[:, : -3] # -2 because we do not need porosity predictions x_labeled = np.hstack((x_label[:, :2], x_label[:, -2:])) y_labeled = data[:, -3: -1] # dimensionless bond length and porosity measurements if samp == 25: data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_25.dat') x_unlabeled = data[:, :] elif samp == 1519: data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat') x_unlabeled = data[:, :] x_unlabeled1 = x_unlabeled[:1303, :] x_unlabeled2 = x_unlabeled[-6:, :] x_unlabeled = np.vstack((x_unlabeled1, x_unlabeled2)) # initial porosity init_poro = x_unlabeled[:, -1] x_unlabeled = np.hstack((x_unlabeled[:, :2], x_unlabeled[:, -3:-1])) # x_unlabeled = x_unlabeled[:, :2] # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0)) # scaler = preprocessing.StandardScaler() x_labeled = scaler.fit_transform(x_labeled) x_unlabeled = scaler.fit_transform(x_unlabeled) # y_labeled = scaler.fit_transform(y_labeled) # # initial porosity & physics outputs are removed # x_unlabeled = x_unlabeled[:, :-3] # train and test data trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] testX, testY = x_labeled[tr_size:, :], y_labeled[tr_size:] if use_YPhy == 0: # Removing the last column from x_unlabeled (corresponding to Y_PHY) x_unlabeled = x_unlabeled[:, :-1] # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add( Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1], ))) else: if reg: model.add( Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) else: model.add(Dense(n_nodes, activation='relu')) model.add(Dropout(rate=drop_frac)) model.add(Dense(2, activation='linear')) # physics-based regularization uinp_sc = K.constant(value=x_unlabeled) # unlabeled input data lam1 = K.constant(value=lamda[0]) # regularization hyper-parameter lam2 = K.constant(value=lamda[1]) # regularization hyper-parameter lam3 = K.constant(value=lamda[2]) # regularization hyper-parameter lam4 = K.constant(value=lamda[3]) # regularization hyper-parameter predictions = model(uinp_sc) # model output at depth i # porosity = K.relu(predictions[:,1]) phyloss1 = bond(predictions[:, 0]) # physics loss 1 # uinp = K.constant(value=x_unlabeled_non) # unlabeled input data phyloss2 = poros(init_poro, predictions[:, 1]) # physics loss 1 phyloss3 = strength1(predictions[:, 0], predictions[:, 1]) phyloss4 = strength2(predictions[:, 0], predictions[:, 1]) totloss = combined_loss( [phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2, lam3, lam4]) phyloss = phy_loss_mean( [phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2, lam3, lam4]) model.compile(loss=totloss, optimizer=optimizer_val, metrics=[phyloss, root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) # print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) # early_stopping = EarlyStopping(monitor='loss', patience=patience_val, verbose=1) # history = model.fit(trainX, trainY, # batch_size=batch_size, # epochs=num_epochs, # verbose=1, # callbacks=[early_stopping, TerminateOnNaN()]) # test_score = model.evaluate(testX, testY, verbose=0) # predictions = model.predict(x_labeled) # model output at depth i # print(np.sort(predictions[:,0], axis=0)) # predictions = model.predict(x_unlabeled) # model output at depth i # print(np.sort(predictions[:,0], axis=0)) # print('iter: ' + str(iteration) + ' useYPhy: ' + str(use_YPhy) + # ' nL: ' + str(n_layers) + ' nN: ' + str(n_nodes) + # ' lamda1: ' + str(lamda[0]) + ' lamda2: ' + str(lamda[1]) + ' trsize: ' + str(tr_size) + # ' TestRMSE: ' + str(test_score[2]) + ' PhyLoss: ' + str(test_score[1]), ' TestLoss: ' + str(test_score[0]), "\n") # # print('iter: ' + str(iteration) + ' TestRMSE: ' + str(test_score[2]) + ' PhyLoss: ' + str(test_score[1]), "\n") # # model.save(model_name) # # save results # results = {'train_loss_1':history.history['loss_1'], # 'val_loss_1':history.history['val_loss_1'], # 'train_rmse':history.history['root_mean_squared_error'], # 'val_rmse':history.history['val_root_mean_squared_error'], # 'test_rmse':test_score[2], # 'PhyLoss':test_score[1]} # results = {'train_loss_1':history.history['loss_1'], # 'train_rmse':history.history['root_mean_squared_error'], # 'test_rmse':test_score[2], # 'PhyLoss':test_score[1]} # save_obj(results, results_name) # predictions = model.predict(testX) # return results, results_name, predictions, testY, test_score[2], trainY test_score = model.evaluate(testX, testY, verbose=1) print(test_score) samples = [] for i in range(int(nsim)): print("simulation num:", i) predictions = model.predict(Xx) predictions = predictions[:, 1] samples.append(predictions[:, np.newaxis]) return np.array(samples) # Main Function if __name__ == '__main__': fix_seeds(1) # List of optimizers to choose from optimizer_names = [ 'Adagrad', 'Adadelta', 'Adam', 'Nadam', 'RMSprop', 'SGD', 'NSGD' ] optimizer_vals = [ Adagrad(clipnorm=1), Adadelta(clipnorm=1), Adam(clipnorm=1), Nadam(clipnorm=1), RMSprop(clipnorm=1), SGD(clipnorm=1.), SGD(clipnorm=1, nesterov=True) ] # selecting the optimizer optimizer_num = 1 optimizer_name = optimizer_names[optimizer_num] optimizer_val = optimizer_vals[optimizer_num] # Selecting Other Hyper-parameters drop_frac = droprate # Fraction of nodes to be dropped out use_YPhy = 1 # Whether YPhy is used as another feature in the NN model or not n_layers = 2 # Number of hidden layers n_nodes = 5 # Number of nodes per hidden layer #set lamda lamda = [0.3, 0.15, 0.008, 0] # Physics-based regularization constant # # Iterating over different training fractions and splitting indices for train-test splits # trsize_range = [4,6,8,10,20] # #default training size = 5000 # tr_size = trsize_range[4] tr_size = int(tr_size) # use regularizer reg = True # sample size used samp = 1519 # samp = 25 # total number of runs iter_range = np.arange(1) testrmse = [] # iterating through all possible params for iteration in iter_range: # results, result_file, pred, obs, rmse, obs_train = PGNN_train_test(optimizer_name, optimizer_val, drop_frac, use_YPhy, # iteration, n_layers, n_nodes, tr_size, lamda, reg, samp) # testrmse.append(rmse) pred = PGNN_train_test(optimizer_name, optimizer_val, drop_frac, use_YPhy, iteration, n_layers, n_nodes, tr_size, lamda, reg, samp) return np.squeeze(pred)
from keras.callbacks import Callback from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam from keras.utils import multi_gpu_model import tensorflow as tf from azureml.core import Run from utils import load_data, one_hot_encode print("Keras version:", keras.__version__) print("Tensorflow version:", tf.__version__) optimizer_types = { 'SGD': lambda lr: SGD(lr=lr), 'RMSprop': lambda lr: RMSprop(lr=lr), 'Adagrad': lambda lr: Adagrad(lr=lr), 'Adadelta': lambda lr: Adadelta(lr=lr), 'Adam': lambda lr: Adam(lr=lr), 'Adamax': lambda lr: Adamax(lr=lr), 'Nadam': lambda lr: Nadam(lr=lr) } parser = argparse.ArgumentParser() parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point') parser.add_argument('--batch-size', type=int, dest='batch_size', default=50, help='mini batch size for training') parser.add_argument('--epoch', type=int, dest='epoch', default=20, help='epoch size for training') parser.add_argument('--first-layer-neurons', type=int, dest='n_hidden_1', default=100, help='# of neurons in the first layer') parser.add_argument('--second-layer-neurons', type=int, dest='n_hidden_2', default=100, help='# of neurons in the second layer') parser.add_argument('--learning-rate', type=float, dest='learning_rate', default=0.001, help='learning rate')
from keras.optimizers import Adagrad sys.path.append(os.path.dirname(os.path.dirname(__file__))) # flake8: noqa from pm25_data import read_data, extract_features, extract_target numpy.set_printoptions(threshold=numpy.nan) feature_hours = 1 data = read_data() features = extract_features(data, feature_hours) target = extract_target(data, feature_hours, pm25_row=9) model = Sequential() linear_layer = Dense(units=1, input_dim=18, kernel_initializer='zeros', bias_initializer='zeros') model.add(linear_layer) optimizer = Adagrad(lr=0.1, epsilon=0) model.compile(loss='mse', optimizer=optimizer) # Training for step in range(10000): cost = model.train_on_batch(features, target.T[0]) weights, biases = linear_layer.get_weights() print("After %d trainings, the cost: %f" % (step, cost)) model.save('model.h5')
tw = K.cast(K.equal(y_true1, 1),"float32") * K.cast(K.greater(y_pred1, 0.25),"float32") fb = K.sum(fb * (y_true1 - y_pred1), axis = [1,2,3]) fw = K.sum(fw * (y_pred1 - y_true1), axis = [1,2,3]) tb = K.sum(tb * (1 - y_pred1 + y_true1), axis = [1,2,3]) tw = K.sum(tw * (1 - y_true1 + y_pred1), axis = [1,2,3]) prec = tw / (tw + fw + 0.0001) rec = tw / (tw + fb + 0.0001) f_mera = 2 * prec * rec / (prec + rec + 0.0001) return K.mean(1 - f_mera) l1, l2 = 0.5, 0.5 def comb_loss(y_true, y_pred): return l1 * weighted_loss(y_true, y_pred) + l2 * f_mera_loss(y_true, y_pred) model.compile(optimizer=Adagrad(lr=0.03), loss=comb_loss, metrics=[f_mera]) csv_logger = CSVLogger('/home/polina/1tb/ss_prediction/centroid_50_90/redone/models_steps/step3/model1/training.log') train_size = calc_data_size(input_train) valid_size = calc_data_size(input_valid) train_gen = generate_batches(input_train) valid_gen = generate_batches(input_valid) model.fit_generator(train_gen, validation_data=valid_gen, steps_per_epoch=int(train_size/bs) - 1, validation_steps=int(valid_size/ bs) - 1,
predictions = Dense(N_CATEGORIES, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) else: raise Exception('invalid model name') if (MODELS == 'inceptionv3' or MODELS == 'vgg16' or MODELS == 'squeezenet' or MODELS == 'mobilenet'): #for fine tuning from keras.optimizers import SGD model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) else: #for full training from keras.optimizers import Adagrad model.compile(optimizer=Adagrad(lr=0.01, epsilon=1e-08, decay=0.0), loss='categorical_crossentropy', metrics=['accuracy']) model.summary() # ---------------------------------------------- # Data Augumentation # ---------------------------------------------- # reference from https://github.com/yu4u/age-gender-estimation/blob/master/random_eraser.py # https://github.com/yu4u/age-gender-estimation/blob/master/LICENSE def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4,
def train_and_evaluate(dataset, loss, noise, run=0, num_batch=32, asymmetric=0): val_split = 0.1 if dataset == 'mnist': kerasModel = MNISTModel(num_batch=num_batch) kerasModel.optimizer = Adagrad() elif dataset == 'cifar10_deep': kerasModel = CIFAR10Model(num_batch=num_batch, type='deep') elif dataset[8:-1] == 'resnet': kerasModel = CIFAR10Model(num_batch=num_batch, type=dataset[8:]) elif dataset == 'cifar100': kerasModel = CIFAR100Model(num_batch=num_batch) elif dataset == 'imdb': kerasModel = IMDBModel(num_batch=num_batch) kerasModel.optimizer = Adagrad() elif dataset == 'lstm': kerasModel = LSTMModel(num_batch=num_batch) kerasModel.optimizer = Adagrad(lr=0.001) else: ValueError('No dataset given.') import sys sys.exit() # an important data-dependent configuration if dataset == 'cifar100': filter_outlier = False else: filter_outlier = True # the data, shuffled and split between train and test sets print('Loading %s ...' % dataset) X_train, X_test, y_train, y_test = kerasModel.get_data() print('Done.') # apply label noise if asymmetric == 0: y_train, P = noisify_with_P(y_train, kerasModel.classes, noise, random_state=run) elif asymmetric == 1: if dataset == 'mnist': y_train, P = noisify_mnist_asymmetric(y_train, noise, random_state=run) elif dataset == 'cifar100': y_train, P = noisify_cifar100_asymmetric(y_train, noise, random_state=run) elif dataset[:7] == 'cifar10': y_train, P = noisify_cifar10_asymmetric(y_train, noise, random_state=run) else: # binary classes y_train, P = noisify_binary_asymmetric(y_train, noise, random_state=run) print('T: \n', P) # convert class vectors to binary class matrices Y_train = to_categorical(y_train, kerasModel.classes) Y_test = to_categorical(y_test, kerasModel.classes) # keep track of the best model model_file = build_file_name('tmp_model/', dataset, loss, noise, asymmetric, run) # this is the case when we post-train changing the loss if loss == 'est_backward': vanilla_file = build_file_name('tmp_model/', dataset, 'crossentropy', noise, asymmetric, run) if not os.path.isfile(vanilla_file): ValueError('Need to train with crossentropy first !') # first compile the vanilla_crossentropy model with the saved weights kerasModel.build_model('crossentropy', P=None) kerasModel.load_model(vanilla_file) # estimate P est = NoiseEstimator(classifier=kerasModel, alpha=0.0, filter_outlier=filter_outlier) # use all X_train P_est = est.fit(X_train).predict() print('Condition number:', np.linalg.cond(P_est)) print('T estimated: \n', P) # compile the model with the new estimated loss kerasModel.build_model('backward', P=P_est) elif loss == 'est_forward': vanilla_file = build_file_name('tmp_model/', dataset, 'crossentropy', noise, asymmetric, run) if not os.path.isfile(vanilla_file): ValueError('Need to train with crossentropy first !') # first compile the vanilla_crossentropy model with the saved weights kerasModel.build_model('crossentropy', P=None) kerasModel.load_model(vanilla_file) # estimate P est = NoiseEstimator(classifier=kerasModel, alpha=0.0, filter_outlier=filter_outlier) # use all X_train P_est = est.fit(X_train).predict() print('T estimated:', P) # compile the model with the new estimated loss kerasModel.build_model('forward', P=P_est) else: # compile the model kerasModel.build_model(loss, P) # fit the model history = kerasModel.fit_model(model_file, X_train, Y_train, validation_split=val_split) history_file = build_file_name('history/', dataset, loss, noise, asymmetric, run) # decomment for writing history with open(history_file, 'wb') as f: pickle.dump(history, f) print('History dumped at ' + str(history_file)) # test score = kerasModel.evaluate_model(X_test, Y_test) # clean models, unless it is vanilla_crossentropy --to be used by P_est if loss != 'crossentropy': os.remove(model_file) return score
def build_model(in_dim, out_dim=1, n_hidden=100, l1_norm=0.0, l2_norm=0, n_deep=5, drop=0.1, learning_rate=0.1, optimizer='Adadelta', activation='tanh'): model = Sequential() # Input layer model.add( Dense(input_dim=in_dim, output_dim=n_hidden, init='uniform', activation=activation, W_regularizer=l1l2(l1=l1_norm, l2=l2_norm))) # do X layers for layer in range(n_deep - 1): model.add(Dropout(drop)) model.add( Dense( output_dim=n_hidden, # np.round(n_hidden/2**(layer+1)), init='uniform', activation=activation)) # Output layer if out_dim == 1: activation = activation else: activation = 'softmax' model.add(Dense(out_dim, init='uniform', activation=activation)) # Optimization algorithms if optimizer == 'Adadelta': if learning_rate is None: opt = Adadelta() else: opt = Adadelta(lr=learning_rate) elif optimizer == 'SGD': if learning_rate is None: opt = SGD() else: opt = SGD(lr=learning_rate) elif optimizer == 'RMSprop': if learning_rate is None: opt = RMSprop() else: opt = RMSprop(lr=learning_rate) elif optimizer == 'Adagrad': if learning_rate is None: opt = Adagrad() else: opt = Adagrad(lr=learning_rate) elif optimizer == 'Adam': if learning_rate is None: opt = Adam() else: opt = Adam(lr=learning_rate) elif optimizer == 'Adamax': if learning_rate is None: opt = Adamax() else: opt = Adamax(lr=learning_rate) else: logger.info( 'Optimizer {} not defined, using Adadelta'.format(optimizer)) opt = Adadelta(lr=learning_rate) if out_dim == 1: model.compile(loss='binary_crossentropy', optimizer=opt) else: model.compile(loss='categorical_crossentropy', optimizer=opt) return model
def _main(): annotation_path = 'annotations_updated/annotations/4000_train_updated_final.txt' log_dir = 'logs/4000_adagrad_e1e2_50x2/' classes_path = 'annotations_updated/annotations/4000_train_updated_classes.txt' anchors_path = 'model_data/tiny_yolo_anchors.txt' class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(anchors_path) input_shape = (416, 416) # multiple of 32, hw is_tiny_version = len(anchors) == 6 # default setting if is_tiny_version: model = create_tiny_model( input_shape, anchors, num_classes, freeze_body=2, weights_path='model_data/yolo_weights-tiny.h5') else: #with tf.device('/cpu:0'): model = create_model(input_shape, anchors, num_classes, freeze_body=2, weights_path='model_data/trained_weights_final.h5' ) # make sure you know what you freeze #model = multi_gpu_model(model, gpus=2) logging = TensorBoard(log_dir=log_dir) checkpoint = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines) * val_split) num_train = len(lines) - num_val # Train with frozen layers first, to get a stable loss. # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. if True: gpus = get_number_of_gpus() print('Found {} gpus'.format(gpus)) #if gpus > 1: #model = ModelMGPU(model, gpus) model.compile( optimizer=Adagrad(lr=1e-1), loss={ # use custom yolo_loss Lambda layer. 'yolo_loss': lambda y_true, y_pred: y_pred }) #model.compile(optimizer=Adam(lr=1e-3), losses={'concatenate_1': lambda y_true, y_pred: y_pred, 'concatenate_2': lambda y_true, y_pred: y_pred}) #model.compile(optimizer=Adam(lr=1e-3), loss=categorical_crossentropy(y_true, y_pred)) batch_size = 64 print('Train on {} samples, val on {} samples, with batch size {}.'. format(num_train, num_val, batch_size)) model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper( lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=50, initial_epoch=0, callbacks=[logging, checkpoint]) model.save_weights(log_dir + 'trained_weights_stage_1.h5') # Unfreeze and continue training, to fine-tune. # Train longer if the result is not good. if True: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=Adagrad(lr=1e-2), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print('Unfreeze all of the layers.') batch_size = 64 # note that more GPU memory is required after unfreezing the body print('Train on {} samples, val on {} samples, with batch size {}.'. format(num_train, num_val, batch_size)) model.fit_generator( data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=100, initial_epoch=50, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) model.save_weights(log_dir + 'trained_weights_final.h5')
def build_model(units=[100, 50, 1], input_dimension=100, layer_act_fn="relu", dropout=0.5, k_init="random_uniform", b_init="zeros", output_act_fn="sigmoid", optimizer="SGD", learning_rate=0.01, momentum=0.9, decay=0.2, nesterov=False, loss_fn="binary_crossentropy", metrics=["accuracy"], custom_verbose=2): """Builds and returns Sequential model""" # printfunctions for verbosity if custom_verbose == 1: # print model backbone details print( "==================================================================================" ) print("Building sequential model with %d hidden layers:" % len(units[1:-1])) # input layer print( "# Input layer:\t\t%d neurons, activation function %s, dropout %f and input dimension %d" % (units[0], layer_act_fn, dropout, input_dimension)) # hidden layers hl_number = 1 # iterator in for loop for u in units[1:-1]: print( "# Hidden layer %d:\t%d neurons, activation function %s, dropout %f" % (hl_number, u, layer_act_fn, dropout)) hl_number += 1 # output layer print("# Output layer:\t\t%d neurons, activation function %s" % (units[-1], output_act_fn)) # model compiling details print("# Optimizer:\t\t%s with learning rate %f" % (optimizer, learning_rate)) print("# Loss function:\t%s" % (loss_fn)) print("# Metrics:\t\t\t%s" % (metrics)) print( "==================================================================================" ) if custom_verbose == 2: # only print the units of the current model (output in training is nice and small) print("creating %s" % units, file=sys.stdout) # change to stderr if "tee" doesn't print this # create model model = Sequential() # add input layer model.add( Dense(units[0], input_dim=input_dimension, kernel_initializer=k_init, bias_initializer=b_init)) model.add(Activation(layer_act_fn)) model.add(Dropout(dropout)) # add hidden layers for u in units[1:-1]: model.add(Dense(u, kernel_initializer=k_init, bias_initializer=b_init)) model.add(Activation(layer_act_fn)) model.add(Dropout(dropout)) # add output layer model.add( Dense(units[-1], kernel_initializer=k_init, bias_initializer=b_init)) model.add(Activation(output_act_fn)) # select keras optimizer with learning rate from optimizer input string # note: only SGD has momentum, decay and nesterov optzd = { "Adam": Adam(lr=learning_rate), "Adagrad": Adagrad(lr=learning_rate), "RMSprop": RMSprop(lr=learning_rate), "SGD": SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) } opt = optzd[optimizer] # compile model model.compile(optimizer=opt, loss=loss_fn, metrics=metrics) return model
def compile_elmo(self, print_summary=False): """ Compiles a Language Model RNN based on the given parameters """ if self.parameters['token_encoding'] == 'word': # Train word embeddings from scratch word_inputs = Input(shape=(None, ), name='word_indices', dtype='int32') embeddings = Embedding(self.parameters['vocab_size'], self.parameters['hidden_units_size'], trainable=True, name='token_encoding') inputs = embeddings(word_inputs) # Token embeddings for Input drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') elif self.parameters['token_encoding'] == 'char': # Train character-level representation word_inputs = Input(shape=( None, self.parameters['token_maxlen'], ), dtype='int32', name='char_indices') inputs = self.char_level_token_encoder()(word_inputs) # Token embeddings for Input drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') # Reversed input for backward LSTMs re_lstm_inputs = Lambda(function=ELMo.reverse)(lstm_inputs) mask = Lambda(function=ELMo.reverse)(drop_inputs) # Forward LSTMs for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) else: lstm = LSTM(units=self.parameters['lstm_units_size'], return_sequences=True, activation="tanh", recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs]) # Projection to hidden_units_size proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(lstm) # Merge Bi-LSTMs feature vectors with the previous ones lstm_inputs = add([proj, lstm_inputs], name='f_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(lstm_inputs) # Backward LSTMs for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: re_lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) else: re_lstm = LSTM( units=self.parameters['lstm_units_size'], return_sequences=True, activation='tanh', recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask]) # Projection to hidden_units_size re_proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(re_lstm) # Merge Bi-LSTMs feature vectors with the previous ones re_lstm_inputs = add([re_proj, re_lstm_inputs], name='b_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers re_lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(re_lstm_inputs) # Reverse backward LSTMs' outputs = Make it forward again re_lstm_inputs = Lambda(function=ELMo.reverse, name="reverse")(re_lstm_inputs) # Project to Vocabulary with Sampled Softmax sampled_softmax = SampledSoftmax( num_classes=self.parameters['vocab_size'], num_sampled=int(self.parameters['num_sampled']), tied_to=embeddings if self.parameters['weight_tying'] and self.parameters['token_encoding'] == 'word' else None) outputs = sampled_softmax([lstm_inputs, next_ids]) re_outputs = sampled_softmax([re_lstm_inputs, previous_ids]) self._model = Model(inputs=[word_inputs, next_ids, previous_ids], outputs=[outputs, re_outputs]) self._model.compile(optimizer=Adagrad( lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']), loss=None) if print_summary: self._model.summary()
evaluation_threads = 1 #mp.cpu_count() print("MLP arguments: %s " %(args)) model_out_file = 'Pretrain/%s_MLP_%s_%d.h5' %(args.dataset, args.layers, time()) # Loading data t1 = time() dataset = Dataset(args.path + args.dataset) train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives num_users, num_items = train.shape print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d" %(time()-t1, num_users, num_items, train.nnz, len(testRatings))) # Build model model = get_model(num_users, num_items, layers, reg_layers) if learner.lower() == "adagrad": model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "rmsprop": model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "adam": model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy') else: model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy') # Check Init performance t1 = time() (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK, evaluation_threads) hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean() print('Init: HR = %.4f, NDCG = %.4f [%.1f]' %(hr, ndcg, time()-t1)) # Train model best_hr, best_ndcg, best_iter = hr, ndcg, -1
from keras.layers import Dense, Dropout, Activation, Merge from keras.optimizers import SGD, Adagrad # left_branch = Sequential() # left_branch.add(Dense(32, input_dim=4096)) # right_branch = Sequential() # right_branch.add(Dense(32, input_dim=4096)) # merged = Merge([left_branch, right_branch], mode='concat') model = Sequential() model.add(Dense(1024, input_dim=4096, init='lecun_uniform', activation='relu')) model.add(Dropout(0.5)) model.add(Dense(512, input_dim=4096, init='lecun_uniform', activation='relu')) model.add(Dropout(0.5)) # model.add(Dense(1024, input_dim=4096, init='he_normal', activation='relu')) # model.add(Dropout(0.5)) # model.add(Dense(64, init='he_normal', activation='relu')) # model.add(Dropout(0.5)) # model.add(Convolution1D(32, 2, init='lecun_uniform', activation='relu', border_mode='valid', input_dim=64)) # model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) sgd = SGD(lr=0.00005, decay=1e-6, momentum=0.9) ag = Adagrad(lr=0.01, epsilon=1e-06) # model.compile(optimizer='rmsprop', # loss='categorical_crossentropy', # metrics=['accuracy']) model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])
patches_imgs_train = raw_train_processed patches_masks_train = mask_train print("Finish extracting !") #patches_imgs_train, patches_masks_train = extract_random_with_disc(raw_train_processed,mask_train,patch_height,patch_width,N_subimgs,inside_FOV) #patches_imgs_train = raw_train_processed #patches_masks_train = mask_train #=========== Construct and save the model arcitecture ===== n_ch = patches_imgs_train.shape[1] patch_height = patches_imgs_train.shape[2] patch_width = patches_imgs_train.shape[3] model = get_unet2(n_ch, patch_height, patch_width) #the U-net model adaGrad = Adagrad(lr=learningrate, epsilon=1e-7, decay=1e-6) sgd = SGD(lr=learningrate, decay=1e-6, momentum=0.9, nesterov=False) adam = Adam(lr=learningrate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) #model.compile(optimizer=sgd, loss='categorical_crossentropy',metrics=['accuracy', mean_IOU_gpu]) model.compile(optimizer=sgd, loss=log_dice_loss, metrics=['accuracy', mean_IOU_gpu, dice_metric]) print "Check: final output of the network:" print model.output_shape #plot(model, to_file='./'+name_experiment+'/'+name_experiment + '_model.png') #check how the model looks like
def test_adagrad(): _test_optimizer(Adagrad()) _test_optimizer(Adagrad(decay=1e-3))
evaluation_threads = 1 #mp.cpu_count() print("MLP arguments: %s " % (args)) model_out_file = 'Pretrain/%s_MLP_%s_%d.h5' % (args.dataset, args.layers, time()) # Loading data t1 = time() dataset = Dataset(args.path + args.dataset) print("Load data done [%.1f s]. " % (time() - t1)) # Build model model = get_model(layers, reg_layers) if learner.lower() == "adagrad": model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "rmsprop": model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "adam": model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy') else: model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy') # Check Init performance t1 = time() test_users, test_items, testNegatives = dataset.test_users, dataset.test_items, dataset.testNegatives
def get_model(args, experiment_dir=None): epoch = 0 recurrent_layer = None if args.layer_type == 'lstm': recurrent_layer = LSTM elif args.layer_type == 'gru': recurrent_layer = GRU else: recurrent_layer = SimpleRNN if not experiment_dir: model = Sequential(weights=None) for layer_index in range(args.num_layers): kwargs = dict() kwargs['units'] = args.rnn_size # if this is the first layer if layer_index == 0: kwargs['input_shape'] = (args.window_size, OUTPUT_SIZE) if args.num_layers == 1: kwargs['return_sequences'] = False else: kwargs['return_sequences'] = True model.add(recurrent_layer(**kwargs)) else: # if this is a middle layer if not layer_index == args.num_layers - 1: kwargs['return_sequences'] = True model.add(recurrent_layer(**kwargs)) else: # this is the last layer kwargs['return_sequences'] = False model.add(recurrent_layer(**kwargs)) model.add(Dropout(args.dropout)) model.add(Dense(OUTPUT_SIZE)) model.add(Activation('softmax')) else: model, epoch = utils.load_model_from_checkpoint(experiment_dir) # these cli args aren't specified if get_model() is being # being called from sample.py if 'grad_clip' in args and 'optimizer' in args: kwargs = {'clipvalue': args.grad_clip} if args.learning_rate: kwargs['lr'] = args.learning_rate # select the optimizers if args.optimizer == 'sgd': optimizer = SGD(**kwargs) elif args.optimizer == 'rmsprop': optimizer = RMSprop(**kwargs) elif args.optimizer == 'adagrad': optimizer = Adagrad(**kwargs) elif args.optimizer == 'adadelta': optimizer = Adadelta(**kwargs) elif args.optimizer == 'adam': optimizer = Adam(**kwargs) elif args.optimizer == 'adamax': optimizer = Adamax(**kwargs) elif args.optimizer == 'nadam': optimizer = Nadam(**kwargs) else: utils.log( 'Error: {} is not a supported optimizer. Exiting.'.format( args.optimizer), True) exit(1) else: # so instead lets use a default (no training occurs anyway) optimizer = Adam() model = multi_gpu_model(model, cpu_merge=False) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model, epoch
color_mode="rgb", shuffle=False, batch_size=BS) testGen = valAug.flow_from_directory( build_dataset.TEST_PATH, class_mode="categorical", target_size=(48, 48), color_mode="rgb", shuffle=False, batch_size=BS) # initialize IDCNet model and compile model = IDCNet.build(width=48, height=48, depth=3, classes=2) opt = Adagrad(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS) model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) H = model.fit_generator( trainGen, steps_per_epoch=totalTrain // BS, validation_data=valGen, validation_steps=totalVal // BS, class_weight=classWeight, epochs=NUM_EPOCHS) # reset the testing generator and then use our trained model to # make predictions on the data print("[INFO] evaluating network...") testGen.reset()
X_hat_Two_DUDE[k_max * i + k - 1, :] = x_dude_hat_two ### 2-D N-DUDE ### C_two, Y_two = N_DUDE.make_data_for_Two_NN_DUDE( P[i], Z[i * n:(i + 1) * n], k, L_new[i * alpha_size:(i + 1) * alpha_size, ], nb_classes, n, offset) model = Sequential() model.add(Dense(40, input_dim=2 * k * nb_classes, init='he_normal')) model.add(Activation('relu')) model.add(Dense(3, init='he_normal')) model.add(Activation('softmax')) rms = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06, clipnorm=1.5) adagrad = Adagrad(clipnorm=1.5) adam = Adam() adadelta = Adadelta() sgd = SGD(lr=0.01, decay=1e-6, momentum=0.95, nesterov=True, clipnorm=1.0) model.compile(loss='poisson', optimizer=adam) model.fit(C_two, Y_two, nb_epoch=10, batch_size=100, show_accuracy=True, verbose=0,
from sklearn.preprocessing import OneHotEncoder from tensorflow.keras import losses, models, optimizers # prepare data train = pd.DataFrame() test = pd.DataFrame() X = train.drop("y", axis=1) y = train["y"] # parameters CV_FOLD_NUM = 4 lerning_rate = 0.0001 epochs = 100 batch_size = 400 optimizer = Adagrad(lr=0.01) optimizer = Adadelta(lr=1.0) optimizer = Adamax(lr=0.002) optimizer = Adam(lr=0.001) def step_decay(epoch): if epoch < 50: return 0.001 else: return 0.0005 * float(tf.math.exp((1 - epoch))) # main wave_block lr_decay = LearningRateScheduler(step_decay)
def main(): start_time = time.time() # argument parser parser = argparse.ArgumentParser(prog='test_sent.py', description='Test MemNN-wordvec model for ABSA sentiment classification') parser.add_argument('--mlp-hidden-units', type=int, default=256, metavar='<mlp-hidden-units>') parser.add_argument('--mlp-hidden-layers', type=int, default=2, metavar='<mlp-hidden-layers>') parser.add_argument('--dropout', type=float, default=0.3, metavar='<dropout-rate>') parser.add_argument('--mlp-activation', type=str, default='relu', metavar='<activation-function>') parser.add_argument('--batch-size', type=int, default=32, metavar='<batch-size>') parser.add_argument('--learning-rate', type=float, default=0.001, metavar='<learning-rate>') parser.add_argument('--aspects', type=int, required=True, metavar='<number of aspects>') parser.add_argument('--domain', type=str, required=True, choices=['rest','lapt'], metavar='<domain>') parser.add_argument('--cross-val-index', type=int, required=True, choices=range(0,10), metavar='<cross-validation-index>') parser.add_argument('--weights', type=str, required=True, metavar='<weights-path>') parser.add_argument('--output', type=str, required=True, metavar='<prediction-path>') args = parser.parse_args() args = parser.parse_args() word_vec_dim = 300 aspect_dim = args.aspects polarity_num = 3 emb_dim = 75 emb_size = 100 img_dim = word_vec_dim hops = 2 ###################### # Model Descriptions # ###################### print('Generating and compiling model...') model = CreateGraph(emb_dim, hops, 'relu', args.mlp_hidden_units, args.mlp_hidden_layers, word_vec_dim, aspect_dim, img_dim, emb_size, polarity_num) # loss and optimizer adagrad = Adagrad(lr=args.learning_rate) model.compile(loss={'output':'categorical_crossentropy'}, optimizer=adagrad) model.load_weights(args.weights) print('Compilation finished.') print('Time: %f s' % (time.time()-start_time)) ###################### # Load Data # ###################### print('Loading data...') # aspect mapping asp_map = LoadAspectMap(args.domain) # sentences te_sents = LoadSentences(args.domain, 'te', args.cross_val_index) # aspects te_asps = LoadAspects(args.domain, 'te', args.cross_val_index, asp_map) print('Finished loading data.') print('Time: %f s' % (time.time()-start_time)) ##################### # GloVe # ##################### print('Loading GloVe vectors...') word_embedding, word_map = LoadGloVe() print('GloVe vectors loaded') print('Time: %f s' % (time.time()-start_time)) ##################### # Encoders # ##################### asp_encoder = GetAspectEncoder(asp_map) lab_encoder = joblib.load('models/'+args.domain+'_labelencoder_'+str(args.cross_val_index)+'.pkl') ###################### # Make Batches # ###################### print('Making batches...') # validation batches te_sent_batches = [ b for b in MakeBatches(te_sents, args.batch_size, fillvalue=te_sents[-1]) ] te_asp_batches = [ b for b in MakeBatches(te_asps, args.batch_size, fillvalue=te_asps[-1]) ] print('Finished making batches.') print('Time: %f s' % (time.time()-start_time)) ###################### # Testing # ###################### # start testing print('Testing started...') pbar = generic_utils.Progbar(len(te_sent_batches)*args.batch_size) predictions = [] # testing feedforward for i in range(len(te_sent_batches)): X_sent_batch = GetSentenceTensor(te_sent_batches[i], word_embedding, word_map) X_asp_batch = GetAspectFeatures(te_asp_batches[i], asp_encoder) pred = model.predict_on_batch({'sentence': X_sent_batch, 'aspect': X_asp_batch}) pred = pred[0] pred = np.argmax(pred, axis=1) pol = lab_encoder.inverse_transform(pred).tolist() predictions.extend(pol) pbar.add(args.batch_size) SavePredictions(args.output, predictions, len(te_sents)) print('Testing finished.') print('Time: %f s' % (time.time()-start_time))