def main(): # read args args = u.read_args() u.create_directories(args) #create classification model c = Regression(args) #if training flag is true build model and train it if args['train']: model = c.build() plot_model(model, to_file='regression.png', show_layer_names=False, show_shapes=False) operator = Train(model, args) operator.train() #if test is true, load best model and test it if args['test']: #load data only without creating model operator = Train(None, args) true, predicted = operator.load() plt.plot(true, color='red', label='true') plt.plot(predicted, color='blue') plt.show()
def compute_similarity(input_folder, save=False, output_folder="similarity/"): artists_list = [] elvis_files = glob.glob(input_folder + "/*.json") prefix = "_".join(input_folder.split("/")) utils.create_directories(output_folder) output_matrix = output_folder + "/" + prefix + "_similarity_matrix.npy" output_index = output_folder + "/" + prefix + "_artists_list.tsv" graphs = [] for file in elvis_files: G = nx.Graph() data = json.load(codecs.open(file, "r", "utf-8")) filename = file[file.rfind("/") + 1 : -5] G.add_node(filename) for sentence in data: for entity in sentence["entities"]: G.add_edge(filename, entity["uri"]) graphs.append(G) artists_list.append(filename) sim_matrix = np.zeros((len(graphs), len(graphs))) for i in range(0, len(graphs)): for j in range(i, len(graphs)): mcs = _maximal_common(graphs[i], graphs[j]) sim_matrix[i, j] = mcs sim_matrix[j, i] = mcs if save: np.save(output_matrix, sim_matrix) fw = open(output_index, "w") fw.write("\n".join(artists_list)) fw.close() return sim_matrix, artists_list
def process_folder(technique, input_folder, output_folder="", tokenize=True, start_index=0, end_index=None): if output_folder == "": output_folder = 'entities/' + input_folder[input_folder.rfind('/')+1:] + "/" + technique utils.create_directories(output_folder) input_filenames = sorted(list(glob.glob(input_folder+"/*.txt"))) i = 0 for input_filename in input_filenames[start_index:end_index]: suffix = input_filename[input_filename.rfind("/")+1:-4] output_filename = output_folder+"/"+suffix+".json" if not os.path.exists(output_filename): if tokenize: with codecs.open(input_filename, "r", "utf-8") as f: text = f.read() sentences = sent_tokenize(text) else: with codecs.open(input_filename, "r", "utf-8") as f: sentences = [line for line in f] ner_sentences = [] if technique == 'tagme': ner_sentences = tagme(sentences) elif technique == 'babelfy': ner_sentences = babelfy(sentences) elif technique == 'spotlight': ner_sentences = spotlight(sentences) json.dump(ner_sentences, codecs.open(output_filename, "w", "utf-8")) i += 1 sys.stdout.write("\rProcessing Data: %d of %d" % (i, len(input_filenames[start_index:end_index]))) sys.stdout.flush()
def compute_similarity(input_folder, save=False, output_folder='similarity/'): artists_list = [] elvis_files = glob.glob(input_folder + "/*.json") prefix = "_".join(input_folder.split('/')) utils.create_directories(output_folder) output_matrix = output_folder + "/" + prefix + "_similarity_matrix.npy" output_index = output_folder + "/" + prefix + "_artists_list.tsv" graphs = [] for file in elvis_files: G = nx.Graph() data = json.load(codecs.open(file, "r", "utf-8")) filename = file[file.rfind("/") + 1:-5] G.add_node(filename) for sentence in data: for entity in sentence['entities']: G.add_edge(filename, entity['uri']) graphs.append(G) artists_list.append(filename) sim_matrix = np.zeros((len(graphs), len(graphs))) for i in range(0, len(graphs)): for j in range(i, len(graphs)): mcs = _maximal_common(graphs[i], graphs[j]) sim_matrix[i, j] = mcs sim_matrix[j, i] = mcs if save: np.save(output_matrix, sim_matrix) fw = open(output_index, 'w') fw.write("\n".join(artists_list)) fw.close() return sim_matrix, artists_list
def prepare_data(train_dir): """Prepare data for training""" # No need to create training or val data directory if they already exist if osp.isdir(cfg.train_data_dir) and osp.isdir(cfg.val_data_dir): print("Using existing data directories: \n{}\n{}\n".format( cfg.train_data_dir, cfg.val_data_dir)) # Still need to set number of training and val images for class_name in cfg.classes: cfg.nb_train_samples += len( os.listdir(osp.join(cfg.train_data_dir, class_name))) cfg.nb_val_samples += len( os.listdir(osp.join(cfg.val_data_dir, class_name))) else: print("Loading training images...\n") # Load all training images from given directory imgs, _, img_paths = load_train_dir(train_dir) # Split into training (80%) and val (20%) sets train_imgs, val_imgs, train_img_paths, val_img_paths = train_test_split( imgs, img_paths, test_size=0.20, random_state=seed) # Set number of training samples and val samples cfg.nb_train_samples = len(train_imgs) cfg.nb_val_samples = len(val_imgs) # Create data directories for training and val data for class_name in cfg.classes: create_directories(osp.join(cfg.train_data_dir, class_name)) create_directories(osp.join(cfg.val_data_dir, class_name)) print("Writing images to training data directory.\n") write_data_directory(train_imgs, train_img_paths, cfg.train_data_dir) print("Writing images to val data directory.\n") write_data_directory(val_imgs, val_img_paths, cfg.val_data_dir)
def main(config): if config.task == 'train': config.train = 1 else: config.train = 0 if config.dataset == 'life': config.task = 'regression' config.experiment = 'train-test' else: config.task = 'classification' config.experiment = 'doublecv' config.expt_name = "Exp" + str( config.experiment ) + "_" + config.mod_split + "_" + config.build_model + "_" + config.last_layer # Create save directories utils.create_directories(config) data = load_dataset(config) if config.experiment == 'mar_doublecv' or config.experiment == 'doublecv': n_feature_sets = len(data.keys()) - 1 elif config.dataset == 'life': n_feature_sets = int(len(data.keys()) / 2) - 1 X = [np.array(data['{}'.format(i)]) for i in range(n_feature_sets)] y = np.array(data['y']) X_test = None y_test = None if config.task == 'classification': config.n_classes = len(set(y)) if config.dataset == 'life': X_test = [ np.array(data['{}_test'.format(i)]) for i in range(n_feature_sets) ] y_test = np.array(data['y_test']) config.n_feature_sets = n_feature_sets config.feature_split_lengths = [i.shape[1] for i in X] if config.verbose > 0: print('Dataset used ', config.dataset) print('Number of feature sets ', n_feature_sets) [ print('Shape of feature set {} {}'.format(e, np.array(i).shape)) for e, i in enumerate(X) ] trainer.train(X, y, config, X_test, y_test) print(config.expt_name) print(config.dataset)
def train(): create_directories() # start recording summaries log_summaries() writer = tf.train.SummaryWriter( logdir, graph=train_sess.graph ) mnist = input_data.read_data_sets('MNIST_data', one_hot=True) init = tf.global_variables_initializer() train_sess.run(init) print('Initialized Variables...') print('Training...') print('Launch TensorBoard to see metrics.') for i in range(n_iter): batch = mnist.train.next_batch(batch_size) _, summ = train_sess.run( [optimizer, summaries], feed_dict={x: batch[0], y: batch[1], keep_prob: dropout_prob} ) writer.add_summary(summ, global_step=i) print('') # done training, calculate acc on test set print("Test Accuracy: %g" % train_sess.run( accuracy, feed_dict={ x: mnist.test.images, y: mnist.test.labels, keep_prob: 1.0 } )) print('') # save if desired while True: prompt = raw_input('Do you wish to save model weights? [y/N] ') if prompt == 'y': fname = raw_input('Enter filename > ') save_path = saver.save(train_sess, path.join(savedir, fname)) print('Model saved at ' + save_path) break elif prompt == 'N': break # close files and sessions writer.close() train_sess.close()
def voting(source, level): tools = ['babelfy', 'tagme', 'spotlight'] filenames = sorted(list(glob.glob(source + "/" + tools[0] + "/*.json"))) output_folder = source + "/agreement_" + str(level) + "/" utils.create_directories(output_folder) n = 0 for file in filenames: output_sentences = [] name = file[file.rfind("/") + 1:] sentences = json.load(codecs.open(file, "r", "utf-8")) ner_file = dict() ner_sentences = dict() for tool in tools: ner_file[tool] = source + "/" + tool + "/" + name ner_sentences[tool] = json.load( codecs.open(ner_file[tool], "r", "utf-8")) i = 0 for i in range(0, len(sentences)): sentence = dict() sentence['text'] = sentences[i]['text'] sentence['index'] = sentences[i]['index'] sentence['entities'] = [] entities = dict() all_entities = dict() for tool in tools: entities[tool] = set() for entity in ner_sentences[tool][i]['entities']: entities[tool].add((entity['startChar'], entity['endChar'], entity['uri'])) all_entities[(entity['startChar'], entity['endChar'], entity['uri'])] = entity agreement3 = entities[tools[0]].intersection( entities[tools[1]]).intersection(entities[tools[2]]) if level == 3: agreement = agreement3 elif level == 2: inter1 = entities[tools[0]].intersection(entities[tools[1]]) inter2 = entities[tools[0]].intersection(entities[tools[2]]) inter3 = entities[tools[1]].intersection(entities[tools[2]]) agreement = inter1.union(inter2).union(inter3) for entity_key in agreement: entity = all_entities[entity_key] if level == 3 or (level == 2 and entity_key in agreement3): entity['confidence'] = 3 else: entity['confidence'] = 2 sentence['entities'].append(entity) output_sentences.append(sentence) json.dump(output_sentences, codecs.open(output_folder + name, 'w', 'utf-8')) n += 1 if n % 1000 == 0: print n
def voting(source,level): tools = ['babelfy','tagme','spotlight'] filenames = sorted(list(glob.glob(source+"/"+tools[0]+"/*.json"))) output_folder = source+"/agreement_"+str(level)+"/" utils.create_directories(output_folder) n = 0 for file in filenames: output_sentences = [] name = file[file.rfind("/")+1:] sentences = json.load(codecs.open(file,"r", "utf-8")) ner_file = dict() ner_sentences = dict() for tool in tools: ner_file[tool] = source+"/"+tool+"/"+name ner_sentences[tool] = json.load(codecs.open(ner_file[tool],"r", "utf-8")) i = 0 for i in range(0,len(sentences)): sentence = dict() sentence['text'] = sentences[i]['text'] sentence['index'] = sentences[i]['index'] sentence['entities'] = [] entities = dict() all_entities = dict() for tool in tools: entities[tool] = set() for entity in ner_sentences[tool][i]['entities']: entities[tool].add((entity['startChar'],entity['endChar'],entity['uri'])) all_entities[(entity['startChar'],entity['endChar'],entity['uri'])] = entity agreement3 = entities[tools[0]].intersection(entities[tools[1]]).intersection(entities[tools[2]]) if level == 3: agreement = agreement3 elif level == 2: inter1 = entities[tools[0]].intersection(entities[tools[1]]) inter2 = entities[tools[0]].intersection(entities[tools[2]]) inter3 = entities[tools[1]].intersection(entities[tools[2]]) agreement = inter1.union(inter2).union(inter3) for entity_key in agreement: entity = all_entities[entity_key] if level == 3 or (level == 2 and entity_key in agreement3): entity['confidence'] = 3 else: entity['confidence'] = 2 sentence['entities'].append(entity) output_sentences.append(sentence) json.dump(output_sentences, codecs.open(output_folder+name,'w','utf-8')) n += 1 if n % 1000 == 0: print n
def __init__(self, opts, load=False): self.sess = tf.Session() self.opts = opts utils.opts_check(self) self.z_dim = self.opts['z_dim'] self.batch_size = self.opts['batch_size'] self.train_data, self.test_data = utils.load_data(self, seed=0) self.data_dims = self.train_data.shape[1:] self.input = tf.placeholder(tf.float32, (None, ) + self.data_dims, name="input") self.losses_train = [] self.losses_test_random = [] self.losses_test_fixed = [] self.experiment_path = self.opts['experiment_path'] if load is False: utils.create_directories(self) utils.save_opts(self) utils.copy_all_code(self) models.encoder_init(self) models.decoder_init(self) models.prior_init(self) models.loss_init(self) models.optimizer_init(self) if 'data_augmentation' in self.opts and self.opts[ 'data_augmentation'] is True: models.data_augmentation_init(self) self.fixed_test_sample = self.sample_minibatch(test=True, seed=0) self.fixed_train_sample = self.sample_minibatch(test=False, seed=0) self.fixed_codes = self.sample_codes(seed=0) if self.opts['make_pictures_every'] is not None: utils.plot_all_init(self) self.saver = tf.train.Saver(keep_checkpoint_every_n_hours=2) self.sess.run(tf.global_variables_initializer()) if load is True: self.load_saved_model()
def main(config): # Create save directories utils.create_directories(config) # Prepare and load the data data = dataset.prepare_data(config.dataset_dir, config) # Train the ensemble models # if config.training_type == 'bagging': # ensemble_trainer.bagging_ensemble_training(data, config) # elif config.training_type == 'boosting': # ensemble_trainer.boosted_ensemble_training(data, config) # Evaluate the model test_data = dataset.prepare_test_data(config.test_dataset_dir, config) evaluator.evaluate(data, test_data, config) print(config.model_dir, config.boosting_type, config.voting_type)
def main(config): # Create save directories utils.create_directories(config) # Prepare and load the data if 'silences' in config.model_types: data = dataset.prepare_data_new(config.dataset_dir, config) else: data = dataset.prepare_data(config.dataset_dir, config) # print(data) # return # Train the ensemble models if config.training_type == 'bagging': ensemble_trainer.bagging_ensemble_training(data, config) elif config.training_type == 'boosting': ensemble_trainer.boosted_ensemble_training(data, config) # Evaluate the model if 'silences' not in config.model_types: test_data = dataset.prepare_test_data(config.test_dataset_dir, config) evaluator.evaluate(data, test_data, config)
def process_folder(technique, input_folder, output_folder="", tokenize=True, start_index=0, end_index=None): if output_folder == "": output_folder = 'entities/' + input_folder[input_folder.rfind('/') + 1:] + "/" + technique utils.create_directories(output_folder) input_filenames = sorted(list(glob.glob(input_folder + "/*.txt"))) i = 0 for input_filename in input_filenames[start_index:end_index]: suffix = input_filename[input_filename.rfind("/") + 1:-4] output_filename = output_folder + "/" + suffix + ".json" if not os.path.exists(output_filename): if tokenize: with codecs.open(input_filename, "r", "utf-8") as f: text = f.read() sentences = sent_tokenize(text) else: with codecs.open(input_filename, "r", "utf-8") as f: sentences = [line for line in f] ner_sentences = [] if technique == 'tagme': ner_sentences = tagme(sentences) elif technique == 'babelfy': ner_sentences = babelfy(sentences) elif technique == 'spotlight': ner_sentences = spotlight(sentences) json.dump(ner_sentences, codecs.open(output_filename, "w", "utf-8")) i += 1 sys.stdout.write("\rProcessing Data: %d of %d" % (i, len(input_filenames[start_index:end_index]))) sys.stdout.flush()
def main(): # read args args = u.read_args() u.create_directories(args) #create classification model c = Classifier(args) #if training flag is true build model and train it if args['train']: model = c.build() plot_model(model, to_file=args['exp_dir'] + 'modelimage' + '.png', show_layer_names=False, show_shapes=False) operator = Train(model, args) operator.train() operator.validate() #if test is true, load best model and test it if args['test']: #load data only without creating model operator = Train(None, args) operator.validate() true, predicted = operator.test() #plot confusion matrix class_names = ['0', '1'] cf = confusion_matrix(true, predicted) plt.figure() u.plot_confusion_matrix( cf, classes=class_names, normalize=False, title='Confusion matrix, without normalization')
def train(source, target): scaled_logits, src_acc, trgt_acc, grad = build_graph(source, target) init = tf.global_variables_initializer() summaries = tf.merge_all_summaries() if not path.isdir(savedir): print('No models found. Start training.') covnet_model.train() create_directories() if raw_input('Do you want to use your own weights? [y\N] ') == 'y': fname = raw_input('Enter saved model name > ') weights = path.join(savedir, fname) else: weights = path.join(savedir, 'default') with tf.Session() as sess: sess.run(init) covnet_model.saver.restore(sess, weights) print('Weights restored.') mnist = input_data.read_data_sets('MNIST_data', one_hot=True) writer = tf.train.SummaryWriter(logdir, graph=sess.graph) src_images, src_labels = get_class(source, mnist.test.images, mnist.test.labels) # pick a random image that is correctly classified by CNN k = 0 while True: original = src_images[np.newaxis, k] label = src_labels[np.newaxis, k] image = np.copy(original) l = scaled_logits.eval( feed_dict={ covnet_model.x: original, covnet_model.y: label, covnet_model.keep_prob: 1. }) if np.argmax(l) == source: # correctly classified break print('Generating Adversarial Image...') print('Open tensorboard to visualize.') # train loop i = 0 target_acc = 0. start_acc = [] while target_acc < .99: # fool to 99% acc source_acc, target_acc, dimg, summ = sess.run( [src_acc, trgt_acc, grad, summaries], feed_dict={ covnet_model.x: image, covnet_model.y: label, covnet_model.keep_prob: 1. }) if i == 0: start_acc.extend([source_acc, target_acc]) writer.add_summary(summ, global_step=i) image = image + learning_rate * dimg.reshape(1, 28 * 28) diff = np.abs(original - image) print("%d source_acc %.5f, target_acc %.5f, sum: %.5f" % (i, source_acc, target_acc, np.sum(diff))) i += 1 print('Adversarial example generated.') # Show the example fig = plt.figure(figsize=(30, 10)) plt.subplot(131) plt.imshow(original.reshape(28, 28), cmap='gray') plt.axis('off') plt.title('Original. source: (%f), target: (%f)' % tuple(start_acc)) plt.subplot(132) plt.imshow(diff.reshape(28, 28), cmap='gray') plt.title('Delta (%f)' % np.sum(diff)) plt.axis('off') plt.subplot(133) plt.imshow(image.reshape(28, 28), cmap='gray') plt.axis('off') plt.title('Adversarial source: (%f), target: (%f)' % (source_acc, target_acc)) plt.show() # ask to save while True: prompt = raw_input('Do you want to save this example? [y\N] ') if prompt == 'y': fname = raw_input( 'Enter name of npy file without extension > ') np.savez(path.join(exampledir, fname), source=original, delta=diff, target=image, source_acc=source_acc, target_acc=target_acc) break elif prompt == 'N': break covnet_model.train_sess.close()
model_prefix = args.dataset + '_' # classifier to extract features (for fid score computation) try: classifier = torch.load('classifier.pt', map_location='cpu') classifier.eval() print('Classifier loaded!') except FileNotFoundError: classifier = Classifier() sys.exit("Need to train a classifier!") # TODO: train classifier # directories for generated samples dir_results = model_prefix + 'results' dir_samples = model_prefix + 'samples' create_directories(dir_results, dir_samples) ########## TEST MODE ########## if args.epochs is None: # load generator G = torch.load(model_prefix + 'generator.pt').to(device) print('Generator loaded!') # generate samples generate_samples(G, dir_samples, args.batchsize, num_samples=4096) sampleloader = get_sample_loader(dir_samples, args.batchsize, image_size) print('Samples generated!') # compute fid score with test set fid_score = get_fid_score(classifier, sampleloader, testloader) sys.exit("FID score from test set: " + str(fid_score))
def test(): model_type = FLAGS.model_type run_desc = FLAGS.run_desc data_dir = Path(FLAGS.data_dir) checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc models_dir = Path(FLAGS.models_dir) / model_type / run_desc results_dir = Path(FLAGS.results_dir) / model_type / run_desc learning_rate = LEARNING_RATE epoch_no = FLAGS.epoch sent_hidden_dim = FLAGS.sent_hidden_dim doc_hidden_dim = FLAGS.doc_hidden_dim if not data_dir.exists(): raise ValueError('Data directory does not exist') # create other directories if they do not exist create_directories(checkpoints_dir, models_dir, results_dir) # load the data print('Loading the data...') # get the glove and elmo embedding glove_dim = 0 elmo_dim = 0 GloVe_vectors = None ELMo = None if 'glove' in model_type: GloVe_vectors = GloVe() glove_dim = WORD_EMBED_DIM print('Uploaded GloVe embeddings.') if 'elmo' in model_type: ELMo = Elmo(options_file=ELMO_OPTIONS_FILE, weight_file=ELMO_WEIGHT_FILE, num_output_representations=1, requires_grad=False, dropout=0).to(DEVICE) elmo_dim = ELMO_EMBED_DIM print('Uploaded Elmo embeddings.') input_dim = glove_dim + elmo_dim # get the fnn and snli data FNN_small_test = FNNDataset(data_dir / ('FNN_small_test.pkl'), GloVe_vectors, ELMo) FNN_DL_small_test = data.DataLoader(dataset=FNN_small_test, batch_size=BATCH_SIZE_FN, num_workers=0, shuffle=True, drop_last=True, collate_fn=PadSortBatchFNN()) print('Uploaded FNN data.') print('Initializing the model...', end=' ') model = initialize_han(input_dim, sent_hidden_dim, doc_hidden_dim, NUM_CLASSES_FN, DEVICE) print('Working on: ', end='') print(DEVICE) print('Done!') print_model_parameters(model) print() optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE) print('Loading model weights.') #model.load_state_dict(torch.load(CHECKPOINTS_DIR_DEFAULT / 'HierarchicalAttentionNet_model.pt')) if epoch_no == '0': model_path = models_dir / Path('HierarchicalAttentionNet_model.pt') model = load_model(model_path, model, checkpoint=False) #_, _, _ = load_latest_checkpoint(model_path, model, optimizer) else: checkpoint_path = checkpoints_dir / Path( 'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) + '_.pt') model = load_model(checkpoint_path, model, checkpoint=True) #_, _, _ = load_checkpoint(checkpoint_path, model, optimizer) model.eval() loss_func_fn = nn.CrossEntropyLoss() y_pred = [] y_true = [] for step, batch in enumerate(FNN_DL_small_test): articles, article_dims, labels = batch out = model(batch=articles, batch_dims=article_dims) y_pred.append(out.argmax(dim=1).to(DEVICE).item()) y_true.append(labels.to(DEVICE).item()) if step % 100 == 0 and step != 0: print( sum(1 for x, y in zip(y_pred, y_true) if x == y) / len(y_pred)) #print(sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average=None)) print( sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average='micro')) print( sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average='macro')) print( sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average=None))
def train_llp(file_name, model_name, useGPU2, constit_input, track_input, MSeg_input, jet_input, plt_model=False, frac=1.0, batch_size=5000, reg_value=0.001, dropout_value=0.1, epochs=50, learning_rate=0.002, hidden_fraction=1, kfold=None): """ Takes in arguments to change architecture of network, does training, then runs evaluate_training :param file_name: Name of the .pkl file containing all the data :param model_name: Name of the model :param useGPU2: True to use GPU2 :param constit_input: ModelInput object for constituents :param track_input: ModelInput object for tracks :param MSeg_input: ModelInput object for muon segments :param jet_input: ModelInput object for jets :param plt_model: True to save model architecture to disk :param frac: Fraction of events to use in file_name :param batch_size: Number of training examples in one forward/backward pass :param reg_value: Value of regularizer term for LSTM :param dropout_value: Fraction of the input units to drop :param epochs: Number of epochs to train the model :param learning_rate: Learning rate :param hidden_fraction: Fraction by which to multiple the dense layers :param kfold: KFold object to do KFold cross validation """ # Setup directories print("\nSetting up directories...\n") dir_name = create_directories( model_name, os.path.split(os.path.splitext(file_name)[0])[1]) # Write a file with some details of architecture, will append final stats at end of training print("\nWriting to file training details...\n") f = open("plots/" + dir_name + "/training_details.txt", "w+") f.write("File name\n") f.write(file_name + "\n") f.write("\nModel name\n") f.write(model_name + "\n") f.write("\nModelInput objects\n") f.write(str(vars(constit_input)) + "\n") f.write(str(vars(track_input)) + "\n") f.write(str(vars(MSeg_input)) + "\n") f.write(str(vars(jet_input)) + "\n") f.write("\nOther hyperparameters\n") f.write( "frac = %s\nbatch_size = %s\nreg_value = %s\ndropout_value = %s\nepochs = %s\nlearning_rate = %s\n" "hidden_fraction = %s\n" % (frac, batch_size, reg_value, dropout_value, epochs, learning_rate, hidden_fraction)) f.close() # Do Keras_setup print("\nSetting up Keras...\n") keras_setup() # Choose GPU if useGPU2: os.environ["CUDA_VISIBLE_DEVICES"] = "1" # Load dataset print("\nLoading up dataset " + file_name + "...\n") df = load_dataset(file_name) # Extract labels Y = df['label'] # Use pt flattened weights from pre-processing for weights weights = df['flatWeight'] # Keep mcWeights mcWeights = df['mcEventWeight'] # Hard code start and end of names of variables X = df.loc[:, 'clus_pt_0':'nn_MSeg_t0_29'] X = df.loc[:, 'jet_pt':'jet_phi'].join(X) # Label Z as parametrized variables Z = df.loc[:, 'llp_mH':'llp_mS'] # Save memory del df # Handle case if no KFold if kfold is None: # Split data into train/test datasets X_train, X_test, y_train, y_test, weights_train, weights_test, mcWeights_train, mcWeights_test, Z_train, Z_test = \ train_test_split(X, Y, weights, mcWeights, Z, test_size=0.2) # Delete variables to save memory del X del Y del Z # Call method that prepares data, builds model architecture, trains model, and evaluates model roc_auc, test_acc = build_train_evaluate_model( constit_input, track_input, MSeg_input, jet_input, X_train, X_test, y_train, y_test, mcWeights_train, mcWeights_test, weights_train, weights_test, Z_test, Z_train, reg_value, frac, dropout_value, hidden_fraction, plt_model, batch_size, dir_name, learning_rate, epochs) return roc_auc, test_acc, dir_name else: # initialize lists to store metrics roc_scores, acc_scores = list(), list() # initialize counter for current fold iteration n_folds = 0 # do KFold Cross Validation for train_ix, test_ix in kfold.split(X, Y): n_folds += 1 print("\nDoing KFold iteration # %.0f...\n" % n_folds) # select samples X_train, y_train, weights_train, mcWeights_train, Z_train = \ X.iloc[train_ix], Y.iloc[train_ix], weights.iloc[train_ix], mcWeights.iloc[train_ix], Z.iloc[train_ix] X_test, y_test, weights_test, mcWeights_test, Z_test = \ X.iloc[test_ix], Y.iloc[test_ix], weights.iloc[test_ix], mcWeights.iloc[test_ix], Z.iloc[test_ix] # Call method that prepares data, builds model architecture, trains model, and evaluates model roc_auc, test_acc = build_train_evaluate_model( constit_input, track_input, MSeg_input, jet_input, X_train, X_test, y_train, y_test, mcWeights_train, mcWeights_test, weights_train, weights_test, Z_test, Z_train, reg_value, frac, dropout_value, hidden_fraction, plt_model, batch_size, dir_name, learning_rate, epochs, kfold, n_folds) roc_scores.append(roc_auc) acc_scores.append(test_acc) return roc_scores, acc_scores, dir_name
def train(): model_type = FLAGS.model_type run_desc = FLAGS.run_desc run_desc_tl = FLAGS.run_desc_tl data_dir = Path(FLAGS.data_dir) checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc models_dir = Path(FLAGS.models_dir) / model_type / run_desc results_dir = Path(FLAGS.results_dir) / model_type / run_desc checkpoints_dir_tl = Path(FLAGS.checkpoints_dir) / model_type / run_desc_tl models_dir_tl = Path(FLAGS.models_dir) / model_type / run_desc_tl results_dir_tl = Path(FLAGS.results_dir) / model_type / run_desc_tl learning_rate = FLAGS.learning_rate batch_size_fn = FLAGS.batch_size epoch_no = FLAGS.epoch sent_hidden_dim = FLAGS.sent_hidden_dim doc_hidden_dim = FLAGS.doc_hidden_dim if not data_dir.exists(): raise ValueError('Data directory does not exist') # create other directories if they do not exist create_directories(checkpoints_dir_tl, models_dir_tl, results_dir_tl) # load the data print('Loading the data...') # get the glove and elmo embedding glove_dim = 0 elmo_dim = 0 GloVe_vectors = None ELMo = None if 'glove' in model_type: GloVe_vectors = GloVe() glove_dim = WORD_EMBED_DIM print('Uploaded GloVe embeddings.') if 'elmo' in model_type: ELMo = Elmo(options_file=ELMO_OPTIONS_FILE, weight_file=ELMO_WEIGHT_FILE, num_output_representations=1, requires_grad=False, dropout=0).to(DEVICE) elmo_dim = ELMO_EMBED_DIM print('Uploaded Elmo embeddings.') input_dim = glove_dim + elmo_dim # get the fnn and snli data keys = ['train', 'test', 'val'] FNN_DL_small = {} for i in keys: FNN_temp = FNNDataset(data_dir / ('FNN_small_' + i + '.pkl'), GloVe_vectors, ELMo) FNN_DL_temp = data.DataLoader(dataset=FNN_temp, batch_size=batch_size_fn, num_workers=0, shuffle=True, drop_last=True, collate_fn=PadSortBatchFNN()) FNN_DL_small[i] = FNN_DL_temp print('Uploaded FNN data.') # initialize the model, according to the model type print('Initializing the model for transfer learning...', end=' ') model = HierarchicalAttentionNet(input_dim=input_dim, sent_hidden_dim=sent_hidden_dim, doc_hidden_dim=doc_hidden_dim, num_classes=NUM_CLASSES_FN, dropout=0).to(DEVICE) print('Done!') print_model_parameters(model) print() print('Working on: ', end='') print(DEVICE) # set the criterion and optimizer # we weigh the loss: class [0] is real, class [1] is fake # loss_func_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(params=model.parameters(), lr=learning_rate) # load the last checkpoint (if it exists) results = { 'epoch': [], 'train_loss': [], 'train_accuracy': [], 'val_loss': [], 'val_accuracy': [] } if epoch_no == '0': model_path = models_dir / Path('HierarchicalAttentionNet_model.pt') _, _, _ = load_latest_checkpoint(model_path, model, optimizer) else: checkpoint_path = checkpoints_dir / Path( 'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) + '_.pt') _, _, _ = load_checkpoint(checkpoint_path, model, optimizer) print(f'Starting transfer learning on the model extracted from {epoch_no}') epoch = 0 for i in range(epoch, MAX_EPOCHS): print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:') model.train() # one epoch of training train_loss_fn, train_acc_fn = train_epoch_fn(FNN_DL_small['train'], model, optimizer, loss_func_fn) # one epoch of eval model.eval() val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL_small['val'], model, loss_func_fn) results['epoch'].append(i) results['train_loss'].append(train_loss_fn) results['train_accuracy'].append(train_acc_fn) results['val_loss'].append(val_loss_fn) results['val_accuracy'].append(val_acc_fn) #print(results) best_accuracy = torch.tensor(val_acc_fn).max().item() create_checkpoint(checkpoints_dir_tl, i, model, optimizer, results, best_accuracy) # save and plot the results save_results(results_dir_tl, results, model) save_model(models_dir_tl, model)
from profile import RunPipeline import argparse parser = argparse.ArgumentParser(description="Run the profiling pipeline") parser.add_argument("--config", help="Config file") args = parser.parse_args() pipeline, profile_config = load_pipeline(config_file=args.config) run_pipeline = RunPipeline(pipeline=pipeline, profile_config=profile_config) for batch in profile_config: print(f"Now processing... batch: {batch}") for plate in profile_config[batch]: create_directories(batch=batch, plate=plate, pipeline=pipeline) if "aggregate" in pipeline: if pipeline["aggregate"]["perform"]: print(f"Now aggregating... plate: {plate}") run_pipeline.pipeline_aggregate(batch=batch, plate=plate) if "annotate" in pipeline: if pipeline["annotate"]["perform"]: print(f"Now annotating... plate: {plate}") run_pipeline.pipeline_annotate(batch=batch, plate=plate) if "normalize" in pipeline: if pipeline["normalize"]["perform"]: print(f"Now normalizing... plate: {plate}") run_pipeline.pipeline_normalize(batch=batch,
from detect import detect_objects import utils import meraki if __name__ == '__main__': """ Steps: 1) Create necessary directories; 2) Connect to Meraki; 3) Get a list of Meraki Cameras; 4) For each camera: 4.1) Downloads a snapshot of the current field of view of the camera; 4.2) Runs the YOLOv3 model trained on the COCO dataset and stores the image locally. """ utils.create_directories() api_key, organization_id, network_id, target_cameras, rtsp = utils.load_config_variables() if not api_key or not network_id: raise Exception('Meraki API Key and Meraki Network Id are mandatory params. You can hard code them above, ' 'use a config.ini file or set them as environment variables. Camera serials should be a string ' 'separated by ;. Camera serials are optional') dashboard = utils.establish_meraki_connection(api_key) cams = utils.get_cameras(dashboard, network_id, target_cameras) print(f'Will process snapshots of {len(cams)} MV cameras') if not cams: raise Exception(f'The network ({network_id}) used does not contain cameras or the cameras you selected are ' 'not on the selected network.') else: for cam in cams: serial_number = cam['serial']
def homogenize(technique, ner_folder, data='server'): # Check if remote server is working, otherwise use local files remote_working = _check_status() if not remote_working or data == 'local': print "Starting to load data from local files" _load_from_local(technique) print "Data loaded" if technique == 'all': techniques = ['spotlight', 'tagme', 'babelfy'] else: techniques = [technique] for technique in techniques: output_folder = ner_folder + '/' + technique + "_h/" + "/" folder = ner_folder + '/' + technique utils.create_directories(output_folder) filenames = sorted(list(glob.glob(folder + "/*.json"))) for file in filenames: name = file[file.rfind("/") + 1:] sentences = json.load(codecs.open(file, "r", "utf-8")) for sentence in sentences: entities = [] for entity in sentence['entities']: add = True if technique.lower() == "spotlight": uri = entity['uri'] ret_categories = _get_categories( entity['uri'], use_remote=remote_working) if ret_categories: entity['categories'] = ret_categories if entity['types'] == "": ret_types = _get_types(uri, use_remote=remote_working) if ret_types: entity['types'] = ",".join(ret_types) elif technique.lower() == "tagme": entity['types'] = "" ret_id_dbpedia = _get_id_dbpedia( entity['id'], use_remote=remote_working) if ret_id_dbpedia: entity['uri'] = ret_id_dbpedia elif 'uri' in entity: entity[ 'uri'] = "http://dbpedia.org/resource/" + entity[ 'uri'].replace(" ", "_") else: entity['uri'] = "NONE" add = False uri = entity['uri'] ret_types = _get_types(uri, use_remote=remote_working) if ret_types: entity['types'] = ",".join(ret_types) formated_categories = [] if 'categories' in entity: for category in entity['categories']: formated_categories.append( category.replace(" ", "_")) entity['categories'] = formated_categories elif technique.lower() == "babelfy": entity['types'] = "" if "dbpedia" in entity['uri']: entity['uri'] = entity['uri'].replace( "\\u0026", "&").replace("\\u0027", "'") ret_categories = _get_categories( entity['uri'], use_remote=remote_working) if ret_categories: entity['categories'] = ret_categories ret_types = _get_types(entity['uri'], use_remote=remote_working) if ret_types: entity['types'] = ",".join(ret_types) entity['endChar'] += 1 ret_redirection = _get_redirections( entity['uri'], use_remote=remote_working) if ret_redirection: entity['uri'] = ret_redirection if add: entities.append(entity) sentence['entities'] = entities print output_folder json.dump(sentences, codecs.open(output_folder + name, "w", "utf-8")) print name
def train(): data_dir = Path(FLAGS.data_dir) checkpoints_dir = Path(FLAGS.checkpoints_dir) models_dir = Path(FLAGS.models_dir) results_dir = Path(FLAGS.results_dir) if not data_dir.exists(): raise ValueError('Data directory does not exist') # create other directories if they do not exist create_directories(checkpoints_dir, models_dir, results_dir) # load the data print('Loading the data...') adj_file = data_dir / 'adj_matrix.npz' features_file = data_dir / 'features_matrix.pkl' labels_file = data_dir / 'labels_matrix.pkl' splits_file = data_dir / 'splits_dict.pkl' adj, features, labels, splits_dict = load_data(adj_file, features_file, labels_file, splits_file) train_idxs = splits_dict['train'] val_idxs = splits_dict['val'] test_idxs = splits_dict['test'] # initialize the model, according to the model type print('Initializing the model...') model = GraphConvolutionalNetwork( input_dim=features.shape[1], hidden_dim=HIDDEN_DIM, num_classes=labels.max().item() + 1, dropout=DROPOUT ).to(DEVICE) # print_model_parameters(model) # set the criterion and optimizer print('Initializing the criterion and optimizer') criterion = nn.NLLLoss() optimizer = optim.Adam( params=model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY ) # initialize the results dict results = { 'epoch': [], 'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [] } print(f'Starting training at epoch 1...') for i in range(0, MAX_EPOCHS): st = time() # train model.train() optimizer.zero_grad() # forward pass output = model(features, adj) # compute the training loss and accuracy train_targets = labels[train_idxs].max(dim=1).indices train_loss = criterion(output[train_idxs], train_targets) train_acc = accuracy(output[train_idxs], train_targets) # backpropogate the loss train_loss.backward() optimizer.step() # evaluate model.eval() output = model(features, adj) val_targets = labels[val_idxs].max(dim=1).indices val_loss = criterion(output[val_idxs], val_targets) val_acc = accuracy(output[val_idxs], val_targets) # record results results['epoch'].append(i) results['train_loss'].append(train_loss.item()) results['train_acc'].append(train_acc.item()) results['val_loss'].append(val_loss.item()) results['val_acc'].append(val_acc.item()) # print update print(f'Epoch: {i+1:02d} Train loss: {train_loss.item():0.4f} Train acc: {train_acc:0.4f} Val loss: {val_loss.item():0.4f} Val acc: {val_acc:0.4f} done in {time() - st} s') # create a checkpoint create_checkpoint(checkpoints_dir, i, model, optimizer, results) # test model.eval() output = model(features, adj) test_targets = labels[test_idxs].max(dim=1).indices test_loss = criterion(output[test_idxs], test_targets) test_acc = accuracy(output[test_idxs], test_targets) # record results results['test_loss'] = test_loss.item() results['test_acc'] = test_acc.item() # save the model and results save_model(models_dir, model) save_results(results_dir, results, model)
def homogenize(technique,ner_folder,data='server'): # Check if remote server is working, otherwise use local files remote_working = _check_status() if not remote_working or data=='local': print "Starting to load data from local files" _load_from_local(technique) print "Data loaded" if technique == 'all': techniques = ['spotlight','tagme','babelfy'] else: techniques = [technique] for technique in techniques: output_folder = ner_folder + '/' + technique + "_h/" + "/" folder = ner_folder + '/' + technique utils.create_directories(output_folder) filenames = sorted(list(glob.glob(folder+"/*.json"))) for file in filenames: name = file[file.rfind("/")+1:] sentences = json.load(codecs.open(file, "r", "utf-8")) for sentence in sentences: entities = [] for entity in sentence['entities']: add = True if technique.lower() == "spotlight": uri = entity['uri'] ret_categories = _get_categories(entity['uri'], use_remote=remote_working) if ret_categories: entity['categories'] = ret_categories if entity['types'] == "": ret_types = _get_types(uri, use_remote=remote_working) if ret_types: entity['types'] = ",".join(ret_types) elif technique.lower() == "tagme": entity['types'] = "" ret_id_dbpedia = _get_id_dbpedia(entity['id'], use_remote=remote_working) if ret_id_dbpedia: entity['uri'] = ret_id_dbpedia elif 'uri' in entity: entity['uri'] = "http://dbpedia.org/resource/"+entity['uri'].replace(" ","_") else: entity['uri'] = "NONE" add = False uri = entity['uri'] ret_types = _get_types(uri, use_remote=remote_working) if ret_types: entity['types'] = ",".join(ret_types) formated_categories = [] if 'categories' in entity: for category in entity['categories']: formated_categories.append(category.replace(" ","_")) entity['categories'] = formated_categories elif technique.lower() == "babelfy": entity['types'] = "" if "dbpedia" in entity['uri']: entity['uri'] = entity['uri'].replace("\\u0026","&").replace("\\u0027","'") ret_categories = _get_categories(entity['uri'], use_remote=remote_working) if ret_categories: entity['categories'] = ret_categories ret_types = _get_types(entity['uri'], use_remote=remote_working) if ret_types: entity['types'] = ",".join(ret_types) entity['endChar'] += 1 ret_redirection = _get_redirections(entity['uri'], use_remote=remote_working) if ret_redirection: entity['uri'] = ret_redirection if add: entities.append(entity) sentence['entities'] = entities print output_folder json.dump(sentences, codecs.open(output_folder+name, "w", "utf-8")) print name
def train(): model_type = FLAGS.model_type run_desc = FLAGS.run_desc data_dir = Path(FLAGS.data_dir) checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc models_dir = Path(FLAGS.models_dir) / model_type / run_desc results_dir = Path(FLAGS.results_dir) / model_type / run_desc #data_percentage = FLAGS.data_percentage if model_type == 'STL': only_fn = True else: only_fn = False # check if data directory exists if not data_dir.exists(): raise ValueError('Data directory does not exist') # create other directories if they do not exist create_directories(checkpoints_dir, models_dir, results_dir) # load the data print('Loading the data...') # get the glove and elmo embeddings GloVe_vectors = GloVe() print('Uploaded GloVe embeddings.') # ELMo = Elmo( # options_file=ELMO_OPTIONS_FILE, # weight_file=ELMO_WEIGHT_FILE, # num_output_representations=1, # requires_grad=False, # dropout=0).to(DEVICE) # print('Uploaded Elmo embeddings.') # get the fnn and snli data FNN = {} FNN_DL = {} for path in ['train', 'val', 'test']: FNN[path] = FNNDataset(data_dir / ('FNN_' + path + '.pkl'), GloVe_vectors) FNN_DL[path] = data.DataLoader(dataset=FNN[path], batch_size=BATCH_SIZE_FN, num_workers=0, shuffle=True, drop_last=True, collate_fn=PadSortBatch()) print('Uploaded FNN data.') if not only_fn: SNLI = {} SNLI_DL = {} for path in ['train', 'val', 'test']: SNLI[path] = SNLIDataset(data_dir / ('SNLI_' + path + '.pkl'), GloVe_vectors) SNLI_DL[path] = data.DataLoader(dataset=SNLI[path], batch_size=BATCH_SIZE_NLI, num_workers=0, shuffle=True, drop_last=True, collate_fn=PadSortBatchSNLI()) print('Uploaded SNLI data.') snli_train_sent_no = len(SNLI['train']) * 2 snli_train_len = len(SNLI['train']) fnn_train_sent_no = get_number_sentences(data_dir / 'FNN_train.pkl') fnn_train_len = len(FNN['train']) # initialize the model, according to the model type print('Initializing the model...', end=' ') if model_type == 'MTL': NUM_CLASSES_NLI = 3 print("Loading an MTL HAN model.") elif model_type == 'STL': NUM_CLASSES_NLI = None print("Loading an STL HAN model.") elif model_type == 'Transfer': print("Nothing for now.") if ELMO_EMBED_DIM is not None: # input_dim = WORD_EMBED_DIM + ELMO_EMBED_DIM input_dim = WORD_EMBED_DIM else: input_dim = WORD_EMBED_DIM model = HierarchicalAttentionNet(input_dim=input_dim, hidden_dim=WORD_HIDDEN_DIM, num_classes_task_fn=NUM_CLASSES_FN, embedding=None, num_classes_task_nli=NUM_CLASSES_NLI, dropout=0).to(DEVICE) print('Working on: ', end='') print(DEVICE) print('Done!') print_model_parameters(model) print() # set the criterion and optimizer # we weigh the loss: class [0] is real, class [1] is fake # real_ratio, fake_ratio = get_class_balance(data_dir / 'FNN_train.pkl') weights = [(1.0 - real_ratio), (1.0 - fake_ratio)] print(weights) class_weights = torch.FloatTensor(weights).to(DEVICE) loss_func_fn = nn.CrossEntropyLoss(weight=class_weights) if not only_fn: loss_func_nli = nn.CrossEntropyLoss() temperature = 2 optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE) # load the last checkpoint (if it exists) epoch, results, best_accuracy = load_latest_checkpoint( checkpoints_dir, model, optimizer) results_fn = { 'epoch': [], 'train_loss': [], 'train_accuracy': [], 'val_loss': [], 'val_accuracy': [] } results_nli = { 'epoch': [], 'train_loss': [], 'train_accuracy': [], 'val_loss': [], 'val_accuracy': [] } results = {'fn': results_fn, 'nli': results_nli} if epoch == 0: print(f'Starting training at epoch {epoch + 1}...') else: print(f'Resuming training from epoch {epoch + 1}...') for i in range(epoch, MAX_EPOCHS): print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:') model.train() # one epoch of training if only_fn: train_loss_fn, train_acc_fn = train_epoch_fn( FNN_DL['train'], model, optimizer, loss_func_fn) elif model_type == 'MTL': model.train() train_loss_fn = [] train_acc_fn = [] loss_fn_weight_gradnorm = 1 train_loss_nli = [] train_acc_nli = [] loss_nli_weight_gradnorm = 1 #define by sentence number #loss_fn_weight_dataset = 1 - fnn_train_sent_no / (fnn_train_sent_no + snli_train_sent_no) #loss_nli_weight_dataset = 1 - snli_train_sent_no / (fnn_train_sent_no + snli_train_sent_no) loss_fn_weight_dataset = 1 - fnn_train_len / (fnn_train_len + snli_train_len) loss_nli_weight_dataset = 1 - snli_train_len / (fnn_train_len + snli_train_len) chance_fn = 1000 * (fnn_train_len / BATCH_SIZE_FN) / ( (fnn_train_len / BATCH_SIZE_FN) + (snli_train_len / BATCH_SIZE_NLI)) iterator_fnn = enumerate(FNN_DL['train']) iterator_snli = enumerate(SNLI_DL['train']) done_fnn, done_snli = False, False step_fnn = 0 step_snli = 0 print( f'Train set length, FNN: {fnn_train_len}. Train set length, SNLI: {snli_train_len}.' ) print( f'Training set to batch size ratio for Fake News Detection is {fnn_train_len / BATCH_SIZE_FN}.' ) print( f'Training set to batch size ratio for Language Inference is {snli_train_len / BATCH_SIZE_NLI}.' ) while not (done_fnn and done_snli): if len(train_loss_fn) > 1 and len(train_loss_nli) > 1: # computes loss weights based on the loss from the previous iterations loss_fn_ratio = train_loss_fn[len(train_loss_fn) - 1] / train_loss_fn[ len(train_loss_fn) - 2] loss_nli_ratio = train_loss_nli[ len(train_acc_nli) - 1] / train_loss_nli[len(train_loss_nli) - 2] loss_fn_exp = math.exp(loss_fn_ratio / temperature) loss_nli_exp = math.exp(loss_nli_ratio / temperature) loss_fn_weight_gradnorm = loss_fn_exp / (loss_fn_exp + loss_nli_exp) loss_nli_weight_gradnorm = loss_nli_exp / (loss_fn_exp + loss_nli_exp) loss_fn_weight = math.exp( loss_fn_weight_dataset * loss_fn_weight_gradnorm) / ( math.exp(loss_fn_weight_dataset * loss_fn_weight_gradnorm) + math.exp(loss_nli_weight_dataset * loss_nli_weight_gradnorm)) loss_nli_weight = math.exp( loss_nli_weight_dataset * loss_nli_weight_gradnorm) / ( math.exp(loss_fn_weight_dataset * loss_fn_weight_gradnorm) + math.exp(loss_nli_weight_dataset * loss_nli_weight_gradnorm)) else: loss_fn_weight = loss_fn_weight_dataset loss_nli_weight = loss_nli_weight_dataset # define the total loss function #loss_func = loss_func_fn + loss_func_nli # is this needed? if np.random.randint(0, 1000) < chance_fn: try: step_fnn, batch_fnn = next(iterator_fnn) except StopIteration: done_fnn = True else: try: batch_loss_fn, batch_acc_fn = train_batch_fn( batch_fnn, model, optimizer, loss_func_fn, loss_fn_weight) train_loss_fn.append(batch_loss_fn) train_acc_fn.append(batch_acc_fn) except: print('Error in batch') else: try: step_snli, batch_snli = next(iterator_snli) except StopIteration: done_snli = True else: try: batch_loss_nli, batch_acc_nli = train_batch_nli( batch_snli, model, optimizer, loss_func_nli, loss_nli_weight) train_loss_nli.append(batch_loss_nli) train_acc_nli.append(batch_acc_nli) except: print('Error in batch') print(f'FNN batch {step_fnn}') print(f'SNLI batch {step_snli}') if step_fnn % 50 == 0 and step_fnn != 0: print(f'Processed {step_fnn} FNN batches.') print(f'Accuracy: {train_acc_fn[len(train_acc_fn)-1]}.') print( f'Weight for loss for NLI is {loss_nli_weight}, for loss for FN is {loss_fn_weight}.' ) if step_snli % 50 == 0 and step_snli != 0: print(f'Processed {step_snli} SNLIbatches.') print(f'Accuracy: {train_acc_nli[len(train_acc_nli)-1]}.') print( f'Weight for loss for NLI is {loss_nli_weight}, for loss for FN is {loss_fn_weight}.' ) # one epoch of eval model.eval() val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL['val'], model, loss_func_fn) tasks = ['fn'] if model_type == 'MTL': val_loss_nli, val_acc_nli = eval_epoch_nli(SNLI_DL['val'], model, loss_func_nli) tasks.append('nli') for task in tasks: results[task]['epoch'].append(i) if task == 'fn': temp_train_loss = train_loss_fn temp_val_loss = val_loss_fn temp_train_acc = train_acc_fn temp_val_acc = val_acc_fn elif task == 'nli': temp_train_loss = train_loss_nli temp_val_loss = val_loss_nli temp_train_acc = train_acc_nli temp_val_acc = val_acc_nli results[task]['train_loss'].append(temp_train_loss) results[task]['train_accuracy'].append(temp_train_acc) results[task]['val_loss'].append(temp_val_loss) results[task]['val_accuracy'].append(temp_val_acc) print(results) best_accuracy = torch.tensor(temp_val_acc).max().item() create_checkpoint(checkpoints_dir, epoch, model, optimizer, results, best_accuracy) # save and plot the results save_results(results_dir, results, model) save_model(models_dir, model) plot_results(results_dir, results, model)
# <parte>: 'part2' se for a parte 2, se for a parte, apenas '' (dois apóstrofos) # Exemplo: # executions = [ # # ('HOG', '32', '') <- HOG, 32 neurônios, parte 1 # # ('LBP', '160', 'part2') <- LBP, 160 neurônios, parte 2 # ] # O número de tuplas de teste é ilimitado, então use de acordo com sua necessidade. # Ao definir todos os casos, execute no terminal: python3 src/run.py executions = [('HOG', '32', '')('LBP', '160', '')] if __name__ == '__main__': start = datetime.now() for run_num, e in enumerate(executions): directory = 'output/{desc}-N{hn:03}-P{part}-{datetime}/'.format( desc=e[0], neurons=e[1], part=2 if 'part2' in e else 1, datetime=start.strftime('%Y-%m-%d-%H-%M'), hn=int(e[1])) u.create_directories(['output', directory]) command = 'python3.6 src/cross-validation.py ' command += '{desc} {neurons:3} {part:5} {directory} > {directory}log.txt &'.format( desc=e[0], neurons=e[1], part=e[2], directory=directory) os.system(command) print('{}. Running: {}'.format(str(run_num + 1).zfill(2), command))
def test(): model_type = FLAGS.model_type run_desc = FLAGS.run_desc data_dir = Path(FLAGS.data_dir) checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc models_dir = Path(FLAGS.models_dir) / model_type / run_desc results_dir = Path(FLAGS.results_dir) / model_type / run_desc learning_rate = LEARNING_RATE epoch_no = FLAGS.epoch if not data_dir.exists(): raise ValueError('Data directory does not exist') # create other directories if they do not exist create_directories(checkpoints_dir, models_dir, results_dir) # load the data print('Loading the data...') # get the glove and elmo embedding glove_dim = 0 elmo_dim = 0 GloVe_vectors = None ELMo = None if 'glove' in model_type: GloVe_vectors = GloVe() glove_dim = WORD_EMBED_DIM print('Uploaded GloVe embeddings.') if 'elmo' in model_type: ELMo = Elmo(options_file=ELMO_OPTIONS_FILE, weight_file=ELMO_WEIGHT_FILE, num_output_representations=1, requires_grad=False, dropout=0).to(DEVICE) elmo_dim = ELMO_EMBED_DIM print('Uploaded Elmo embeddings.') input_dim = glove_dim + elmo_dim # get the fnn and snli data keys = ['train', 'test', 'val'] FNN_DL_small = {} for i in keys: FNN_temp = FNNDataset(data_dir / ('FNN_small_' + i + '.pkl'), GloVe_vectors, ELMo) FNN_DL_temp = data.DataLoader(dataset=FNN_temp, batch_size=BATCH_SIZE_FN, num_workers=0, shuffle=True, drop_last=True, collate_fn=PadSortBatchFNN()) FNN_DL_small[i] = FNN_DL_temp print('Uploaded FNN data.') print('Initializing the model...', end=' ') model = initialize_han(input_dim, WORD_HIDDEN_DIM, NUM_CLASSES_FN, DEVICE) print('Working on: ', end='') print(DEVICE) print('Done!') print_model_parameters(model) print() optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE) print('Loading model weights.') #model.load_state_dict(torch.load(CHECKPOINTS_DIR_DEFAULT / 'HierarchicalAttentionNet_model.pt')) if epoch_no == '0': model_path = models_dir / Path('HierarchicalAttentionNet_model.pt') model = load_model(model_path, model, checkpoint=False) #_, _, _ = load_latest_checkpoint(model_path, model, optimizer) else: checkpoint_path = checkpoints_dir / Path( 'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) + '_.pt') model = load_model(checkpoint_path, model, checkpoint=True) #_, _, _ = load_checkpoint(checkpoint_path, model, optimizer) #model.eval() loss_func_fn = nn.CrossEntropyLoss() #y_pred = [] #y_true = [] for split in keys: all_embeds = [] for step, batch in enumerate(FNN_DL_small[split]): embeds = get_article_embeddings(model, batch) all_embeds.append(embeds[0]) pkl.dump( all_embeds, open( data_dir / ('FNN_small_embeds_' + model_type + '_' + split + '.pkl'), 'wb'))
def train(): model_type = FLAGS.model_type run_desc = FLAGS.run_desc data_dir = Path(FLAGS.data_dir) checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc models_dir = Path(FLAGS.models_dir) / model_type / run_desc results_dir = Path(FLAGS.results_dir) / model_type / run_desc learning_rate = LEARNING_RATE sent_hidden_dim = FLAGS.sent_hidden_dim doc_hidden_dim = FLAGS.doc_hidden_dim if not data_dir.exists(): raise ValueError('Data directory does not exist') # create other directories if they do not exist create_directories(checkpoints_dir, models_dir, results_dir) # load the data print('Loading the data...') # get the glove and elmo embedding glove_dim = 0 elmo_dim = 0 GloVe_vectors = None ELMo = None if 'glove' in model_type: GloVe_vectors = GloVe() glove_dim = WORD_EMBED_DIM print('Uploaded GloVe embeddings.') if 'elmo' in model_type: ELMo = Elmo(options_file=ELMO_OPTIONS_FILE, weight_file=ELMO_WEIGHT_FILE, num_output_representations=1, requires_grad=False, dropout=0).to(DEVICE) elmo_dim = ELMO_EMBED_DIM print('Uploaded Elmo embeddings.') input_dim = glove_dim + elmo_dim # get the fnn and snli data FNN = {} FNN_DL = {} for path in ['train', 'val', 'test']: FNN[path] = FNNDataset(data_dir / ('FNN_' + path + '.pkl'), GloVe_vectors, ELMo) FNN_DL[path] = data.DataLoader(dataset=FNN[path], batch_size=BATCH_SIZE_FN, num_workers=0, shuffle=True, drop_last=True, collate_fn=PadSortBatchFNN()) print('Uploaded FNN data.') fnn_train_sent_no = get_number_sentences(data_dir / 'FNN_train.pkl') fnn_train_len = len(FNN['train']) # initialize the model, according to the model type print('Initializing the model...', end=' ') model = HierarchicalAttentionNet(input_dim=input_dim, sent_hidden_dim=sent_hidden_dim, doc_hidden_dim=doc_hidden_dim, num_classes=NUM_CLASSES_FN, dropout=0).to(DEVICE) print('Working on: ', end='') print(DEVICE) print('Done!') print_model_parameters(model) print() # set the criterion and optimizer # we weigh the loss: class [0] is real, class [1] is fake # real_ratio, fake_ratio = get_class_balance(data_dir / 'FNN_train.pkl') weights = [(1.0 - real_ratio), (1.0 - fake_ratio)] print(weights) class_weights = torch.FloatTensor(weights).to(DEVICE) loss_func_fn = nn.CrossEntropyLoss(weight=class_weights) optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE) # load the last checkpoint (if it exists) results = { 'epoch': [], 'train_loss': [], 'train_accuracy': [], 'val_loss': [], 'val_accuracy': [] } epoch, results, best_accuracy = load_latest_checkpoint( checkpoints_dir, model, optimizer) if epoch == 0: print(f'Starting training at epoch {epoch + 1}...') else: print(f'Resuming training from epoch {epoch + 1}...') for i in range(epoch, MAX_EPOCHS): print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:') model.train() # one epoch of training train_loss_fn, train_acc_fn = train_epoch_fn(FNN_DL['train'], model, optimizer, loss_func_fn) # one epoch of eval model.eval() val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL['val'], model, loss_func_fn) results['epoch'].append(i) results['train_loss'].append(train_loss_fn) results['train_accuracy'].append(train_acc_fn) results['val_loss'].append(val_loss_fn) results['val_accuracy'].append(val_acc_fn) #print(results) best_accuracy = torch.tensor(val_acc_fn).max().item() create_checkpoint(checkpoints_dir, i, model, optimizer, results, best_accuracy) if (i + 1) % 4 == 0 and i != 0: learning_rate = learning_rate / 2 optimizer = optim.Adam(params=model.parameters(), lr=learning_rate) # save and plot the results save_results(results_dir, results, model) save_model(models_dir, model)
filename = name + "/" + "{}_{}_{}.jpg".format( name, datetime_format, count).lower() filename_list.append(filename) utils.save_cv2_image(IMAGE_ORIGINAL_DIR, filename, frame) count += 1 frame_no += 1 if OCI_STORAGE_SYNC: pool = multiprocessing.pool.ThreadPool(processes=3) pool.apply_async(oci_utils.upload_to_object_storage, args=[ config, IMAGE_ORIGINAL_DIR, OCI_STORAGE_BUCKET_NAME, filename_list ]) pool.close() return json.dumps({'result': 'success', 'message': 'File uploaded!'}) print("facerec_service loaded!") utils.create_directories( [IMAGE_UPLOAD_DIR, IMAGE_ORIGINAL_DIR, IMAGE_PREPARED_DIR]) config = oci.config.from_file(OCI_CONFIG_PATH, "DEFAULT") if OCI_STORAGE_SYNC: oci_utils.syncronize_with_object_storage(config, IMAGE_ORIGINAL_DIR, OCI_STORAGE_BUCKET_NAME) face_recognition = face.Recognition() #train()
print(score) if save_model: print("Saving model") model.save(final_model) if predictions: print("Running predictions on test data") test_data = np.load(f"{config['Paths']['ising data']}/test_data.npz") test_data = test_data["data"] predictions = model.predict(test_data) prediction_name = config['Paths'][ 'predictions'] + "/predictions_" + str( model_iteration) + "_" + str(model_type) + ".npy" np.save(prediction_name, predictions) print("Training Network") create_directories() predictions = cnn_regressor(model_iteration=argv[2], model_type=argv[3], activation=argv[4], optimizer=argv[5], dropout=argv[6], batchnorm=argv[7], batchnorm_order=argv[8], batch_size=argv[9], learning_rate=argv[10], training_data_length=argv[11], shallow_network=argv[12])
AUTOTUNE = tf.data.experimental.AUTOTUNE # download data_set flowes photos / if you already have the directory, you can comment this line datadir = keras.utils.get_file(origin='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',fname='flower_photos',untar=True) datadir = pathlib.Path(datadir) # count imagem flowers / if you already have the directory, you can comment this line image_count = len(list(datadir.glob("*/*.jpg"))) print("Numero de Imagens : {}".format(image_count)) # get class names/ if you already have the directory, you can comment this line CLASS_NAME = np.array([item.name for item in datadir.glob('*') if item.name != 'LICENSE.txt']) print(CLASS_NAME) # create_directories by flowers / if you already have the directory, you can comment this line utl.create_directories() # create train and test folders / if you already have the directory (train,test), you can comment this line utl.create_test_train_folder(datadir,CLASS_NAME) #define model model = iam.define_model() # run model iam.run(model) # get imgs roses for predict example roses = list(datadir.glob('roses/*')) # run predict pred.run_example(roses[2])
training_this_round, testing_this_round, fold_i)) thread_list.append(t) # Starts threads for thread in thread_list: thread.start() for thread in thread_list: thread.join() # início da execução if __name__ == "__main__": # horário de execução, descritor, parâmetros, diretórios, lista de classes e lista de dataset start_algorithm = datetime.now() arguments = get_arguments() parameters = p.get_parameters(arguments['descriptor'], 'part2' in sys.argv, arguments['neurons'], arguments['output']) print(parameters) u.create_directories(['data', 'src', 'output']) dataset = u.get_dataset_list(u.get_classes_list(parameters['workpath']), parameters['workpath']) k_fold(dataset, len(dataset), parameters, start_algorithm) print("Main Start Time: \t\t\t\t\t\t{}".format( start_algorithm.strftime("%Y-%m-%d %H:%M:%S"))) print("Main End Time: \t\t\t\t\t\t{}".format( datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) print("Total time running: \t\t\t\t\t\t{}\n".format(datetime.now() - start_algorithm))
features, act_labels, verbose=True) train_data, act_train_labels = train_loader.time_series_to_section( train_ts.copy(), num_act_labels, sliding_window_size=200, step_size_of_sliding_window=10) test_data, act_test_labels = train_loader.time_series_to_section( test_ts.copy(), num_act_labels, sliding_window_size=200, step_size_of_sliding_window=10) print("---Data is successfully loaded") handler = DataHandler(train_data, test_data) norm_train = handler.normalise("train") norm_test = handler.normalise("test") print("--- Shape of Training Data:", train_data.shape) print("--- Shape of Test Data:", test_data.shape) expt_name = "thurs_Script_jog2" create_directories(expt_name) gan_ = GAN(norm_train.shape) trainer_ = Trainer(gan_, expt_name) trainer_.train_gan(epochs=200, batch_size=128, sample_interval=10, train_data=norm_train)
""" print(params['feature']) train_x, train_y = data_loader.load_combined_data(params['feature'], 'train') valid_x, valid_y = data_loader.load_combined_data(params['feature'], 'valid') params['name'] = utils.create_name(params) model = train_model(feature, params, train_x, train_y, valid_x, valid_y) test_x, test_y = data_loader.load_combined_data(params['feature'], 'test') print('Calculating performance on test set') print('AUC', test_model(model, test_x, test_y, params)) if __name__ == "__main__": for feature in [ 'hl', 'et_and_ht', 'et', 'ht', 'et_and_ht_and_hl', 'mass', 'hl_and_mass', ]: utils.create_directories(feature) params = utils.get_optimal_params(feature) main(feature, params)