def run_KMC(model, num_iterations, foldername, atom_features_bool, pairs_features_bool, num_atom_features, num_pairs_features, num_atoms, molecule_size_normalizer, cycle_size_normalizer, max_num_of_bonds, num_timesteps, validation_percentage): # Run the whole Kinetic Monte Carlo process using the trained "model" data_loader = DataLoader(foldername, atom_features_bool, pairs_features_bool, num_atom_features, num_pairs_features, num_atoms, molecule_size_normalizer, cycle_size_normalizer, max_num_of_bonds, num_timesteps, validation_percentage) # Get the initial input for the model Xtest_input_atom, Xtest_input_pairs, Xtest_atom_graph, Xtest_mask, Xtest_extract_pairs, Ytest, Ytest_time = data_loader.get_data_no_generator( 1, 1, 'train') bond_change = {} first_frame = get_first_frame(Xtest_input_pairs, Xtest_extract_pairs, num_atoms) atom_types = Xtest_input_atom[0, :, 0] adjacency_matrix = first_frame.copy() time = [0] for i in range(num_iterations): if i % 10 == 0: print(i) # Get the "reactivity scores" from the model results_probs, result_time = model.predict([ Xtest_input_atom, Xtest_input_pairs, Xtest_atom_graph, Xtest_mask, Xtest_extract_pairs ]) # From the "reactivity scores" pick a reaction and the time before this reaction using the Kinetic Monte Carlo algorithm. adjacency_matrix, bond_change, time_new = run_step_KMC( results_probs[0], result_time[0, 0], Xtest_extract_pairs, adjacency_matrix, bond_change, time[-1]) time.append(time_new) # Update the system with the picked reaction and recalculate the input of the model. Xtest_input_atom, Xtest_input_pairs, Xtest_atom_graph = get_new_input( adjacency_matrix, atom_features_bool, pairs_features_bool, molecule_size_normalizer, cycle_size_normalizer, num_atoms, num_atom_features, num_pairs_features, max_num_of_bonds, atom_types) Xtest_input_pairs = tf.gather_nd(Xtest_input_pairs, Xtest_extract_pairs) return bond_change, first_frame, time
def train(args): batch_size = 16 shape = (200, 100, 3) loader = DataLoader(args.path, args.samples, shape[:2], augment=args.aug) exp_path = "./" model = siamese_model(shape) optim = tf.keras.optimizers.Adam(lr=0.0001) loss = 'binary_crossentropy' metrics = ['binary_accuracy', 'acc'] model.compile(loss=loss, optimizer=optim, metrics=metrics) model.summary() tb = tf.keras.callbacks.TensorBoard(log_dir=os.path.join( exp_path, args.exp_name, "logs"), histogram_freq=0, write_graph=True, write_images=True) checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join( exp_path, args.exp_name, "ckpt"), monitor='val_acc', verbose=1, save_weights_only=True, save_best_only=True, mode='max') callbacks_list = [checkpoint, tf.keras.callbacks.TerminateOnNaN(), tb] history = model.fit_generator( loader.generate_epoch_train(batch_size), validation_data=loader.generate_epoch_val(batch_size), validation_steps=loader.val_size // batch_size, steps_per_epoch=loader.train_size // batch_size, epochs=700, callbacks=callbacks_list) acc = history.history['val_binary_accuracy'] tacc = history.history['binary_accuracy']
def test(args): batch_size = 16 shape = (200, 100, 3) loader = DataLoader(args.path, 50, shape[:2], test=True) exp_path = "./" model = siamese_model(shape) optim = tf.keras.optimizers.Adam(lr=0.0001) loss = 'binary_crossentropy' metrics = ['binary_accuracy', 'acc'] model.compile(loss=loss, optimizer=optim, metrics=metrics) model.load_weights(os.path.join(exp_path, args.exp_name, "ckpt")) print("Model Loaded Successifully") model.summary() # # tb = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(exp_path, args.exp_name, "logs"), # histogram_freq=0, # write_graph=True, # write_images=True) # # checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join(exp_path, args.exp_name, "ckpt"), # monitor='val_acc', # verbose=1, # save_weights_only=True, # save_best_only=True, # mode='max') # callbacks_list = [checkpoint, tf.keras.callbacks.TerminateOnNaN(), tb] history = model.evaluate(loader.generate_test(batch_size), steps=loader.test_size * 50 // batch_size ) print(history) sleep(5)
def train(cfg): tf_logger = SummaryWriter('tf_logs/' + cfg['model_id']) # train and test share the same set of documents documents = load_documents(cfg['data_folder'] + cfg['{}_documents'.format(cfg['mode'])]) # train data train_data = DataLoader(cfg, documents) valid_data = DataLoader(cfg, documents, mode='dev') model = KAReader(cfg) model = model.to(torch.device('cuda')) trainable = filter(lambda p: p.requires_grad, model.parameters()) optim = torch.optim.Adam(trainable, lr=cfg['learning_rate']) if cfg['lr_schedule']: scheduler = torch.optim.lr_scheduler.MultiStepLR(optim, [30], gamma=0.5) model.train() best_val_f1 = 0 best_val_hits = 0 for epoch in range(cfg['num_epoch']): batcher = train_data.batcher(shuffle=True) train_loss = [] for feed in batcher: loss, pred, pred_dist = model(feed) train_loss.append(loss.item()) # acc, max_acc = cal_accuracy(pred, feed['answers'].cpu().numpy()) # train_acc.append(acc) # train_max_acc.append(max_acc) optim.zero_grad() loss.backward() if cfg['gradient_clip'] != 0: torch.nn.utils.clip_grad_norm_(trainable, cfg['gradient_clip']) optim.step() tf_logger.add_scalar('avg_batch_loss', np.mean(train_loss), epoch) val_f1, val_hits = test(model, valid_data, cfg['eps']) if cfg['lr_schedule']: scheduler.step() tf_logger.add_scalar('eval_f1', val_f1, epoch) tf_logger.add_scalar('eval_hits', val_hits, epoch) if val_f1 > best_val_f1: best_val_f1 = val_f1 if val_hits > best_val_hits: best_val_hits = val_hits torch.save( model.state_dict(), 'model/{}/{}_best.pt'.format(cfg['name'], cfg['model_id'])) print('evaluation best f1:{} current:{}'.format(best_val_f1, val_f1)) print('evaluation best hits:{} current:{}'.format( best_val_hits, val_hits)) print('save final model') torch.save(model.state_dict(), 'model/{}/{}_final.pt'.format(cfg['name'], cfg['model_id'])) # model_save_path = 'model/{}/{}_best.pt'.format(cfg['name'], cfg['model_id']) # model.load_state_dict(torch.load(model_save_path)) print('\n..........Finished training, start testing.......') test_data = DataLoader(cfg, documents, mode='test') model.eval() print('finished training, testing final model...') test(model, test_data, cfg['eps'])
print('how many eval samples......', len(f1s)) print('avg_f1', np.mean(f1s)) print('avg_hits', np.mean(hits)) model.train() return np.mean(f1s), np.mean(hits) if __name__ == "__main__": # config_file = sys.argv[2] cfg = get_config() random.seed(cfg['seed']) np.random.seed(cfg['seed']) torch.manual_seed(cfg['seed']) torch.cuda.manual_seed_all(cfg['seed']) if cfg['mode'] == 'train': train(cfg) elif cfg['mode'] == 'test': documents = load_documents(cfg['data_folder'] + cfg['{}_documents'.format(cfg['mode'])]) test_data = DataLoader(cfg, documents, mode='test') model = KAReader(cfg) model = model.to(torch.device('cuda')) model_save_path = 'model/{}/{}_best.pt'.format(cfg['name'], cfg['model_id']) model.load_state_dict(torch.load(model_save_path)) model.eval() test(model, test_data, cfg['eps']) else: assert False, "--train or --test?"
from data_generator import DataLoader from rp_net import RPNet import numpy as np import tensorflow as tf import cv2 if __name__ == "__main__": tf.random.set_seed(123) print(tf.executing_eagerly()) loader = DataLoader("rcnn_data") num_anchors = len(loader.anchor_sizes) anchor_labels = loader.label_anchors(8, 0.7, 0.30) scale = loader.anchor_scale images = loader.images images = np.reshape(images, (-1, 256, 256, 3)) images = images.astype(np.float32) / 255 output_width = int(256 / scale) output_height = int(256 / scale) target_cls = loader.anchor_cls target_cls = np.reshape(target_cls, (-1, output_height, output_width, num_anchors)) target_cls = target_cls.astype(np.float32) target_reg = loader.anchor_reg
myLosses = { 'prediction_probs': "categorical_crossentropy", 'prediction_time': 'mse' } myLossesWeights = {'prediction_probs': 10**5, 'prediction_time': 10**-13} depthlist = [5] # range(5,3,-1) hiddenlist = [32] ratelist = {"a": 0.001} nbdense = 2 # Select how to load and how to preprocess data according to the path selected,using data generator or not that fits with our computational ressources # for training and validation data if use_bucket == 0: data_loader = DataLoader(foldername, atom_features_bool, pairs_features_bool, num_atom_features, num_pairs_features, num_atoms, molecule_size_normalizer, cycle_size_normalizer, max_num_of_bonds, num_timesteps, validation_percentage) else: data_loader = DataLoaderBucket(foldername, atom_features_bool, pairs_features_bool, num_atom_features, num_pairs_features, num_atoms, molecule_size_normalizer, cycle_size_normalizer, max_num_of_bonds, num_timesteps, validation_percentage) if use_generator: data = tf.data.Dataset.from_generator( data_loader.get_data_with_generator, args=[num_examples_per_epoch, batch_size], output_types=((tf.float32, tf.float32, tf.int32, tf.float32,