def train_model(model, train, val): model = create_model() model.compile(optimizer='adam', loss=LOSSES, loss_weights=LOSS_WEIGHTS, metrics=['accuracy']) train_labels = train.labels_list() val_labels = val.labels_list() training_generator = data_generator.DataGenerator(train, range(train.size()), train_labels, **PARAMS) validation_generator = data_generator.DataGenerator( val, range(val.size()), val_labels, **PARAMS) filepath = "completemodel_weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') #TODO fit generator (ensure generator returns two outputs) model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, callbacks=[checkpoint], epochs=15)
def test_if_images_after_preprocessing_are_fine(): graph = tf.Graph() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.InteractiveSession(graph=graph, config=config) dataset_filenames, dataset_sizes = dataset_name_factory.new_get_noncol_train_data_sorted_by_direction_noncol_test_data( ) train_data_gen = data_generator.DataGenerator( config.BATCH_SIZE, config.TF_NUM_CLASSES, dataset_sizes['train_dataset'], config.TF_INPUT_SIZE, sess, dataset_filenames['train_dataset'], config.TF_INPUT_AFTER_RESIZE, False) test_data_gen = data_generator.DataGenerator( config.BATCH_SIZE, config.TF_NUM_CLASSES, dataset_sizes['test_dataset'], config.TF_INPUT_SIZE, sess, dataset_filenames['test_dataset'], config.TF_INPUT_AFTER_RESIZE, True) tf_train_img_ids, tf_train_images, tf_train_labels = train_data_gen.tf_augment_data_with( ) tf_test_img_ids, tf_test_images, tf_test_labels = test_data_gen.tf_augment_data_with( ) for env_idx in range(4): tr_img_id, tr_images, tr_labels = train_data_gen.sample_a_batch_from_data( env_idx, shuffle=True) ts_img_id, ts_images, ts_labels = test_data_gen.sample_a_batch_from_data( env_idx, shuffle=False) save_batch_of_data('train_env_%d' % env_idx, tr_images, tr_labels) save_batch_of_data('test_env_%d' % env_idx, ts_images, ts_labels)
def load_mnist_data(): json_train_path = '/data/data/mnist_train_data/labels/train.json' json_val_path = '/data/data/mnist_train_data/labels/val.json' save_path = '/data/data/mnist_train_data/images' train_data = data_generator.DataGenerator(img_dirpath=save_path, json_path=json_train_path, img_w=params.img_w, img_h=params.img_h, batch_size=params.batch_size) train_data.build_data() train_sample_num = train_data.n val_data = data_generator.DataGenerator(img_dirpath=save_path, json_path=json_val_path, img_w=params.img_w, img_h=params.img_h, batch_size=params.batch_size) val_data.build_data() val_sample_num = val_data.n return train_data.next_batch(), \ val_data.next_batch(), \ train_sample_num, \ val_sample_num
def main(model_path=None, data_path=None): model = ResidualCNN() if model_path != None: model.load(model_path) training_data_X = None training_data_y = None if data_path != None: file = h5py.File(data_path, 'r') training_data_X = np.copy(file['X']) training_data_y = np.copy(file['y']) else: generator = data_generator.DataGenerator( num_workers=constants.NUM_WORKERS) training_data_X, training_data_y = generator.generate_simple_agent_data( ) version = 0 for i in range(constants.EPOCHS): model.model.fit(x=training_data_X, y=training_data_y, batch_size=constants.BATCH_SIZE, epochs=1, shuffle=True, validation_split=0.2) model.save(constants.SAVE_MODELS_DIR, constants.MODEL_PREFIX, version) version += 1
def refresh(self): self.data_gen = data_generator.DataGenerator( *configurations.config["data"].values()) self.features, self.labels = self.data_gen.get_data() self.knn_model = knn.Knn(self.features, self.labels) self.knn_model.best_params() self.knn_model.train()
def __init__(self, mes, trainable=True): self.mes = mes self.name = mes.model_name self.model_path = mes.model_path self.model_save_path = mes.model_save_path self.model_log_path = mes.model_log_path self.model_type = mes.model_type self.col_name = mes.train_col self.graph = tf.Graph() self.trainable = trainable if self.model_type == 'LSTM': self.data_generator = data_generator_LSTM.DataGeneratorLSTM(mes, trainable) self.model = model_cnnlstmpl.LSTMModel(self.mes, self.graph) # elif self.model_type == 'ABSA_LSTM': # self.data_generator = data_generator_ABSA.DataGeneratorABSALSTM(self.mes, trainable) # self.model = models.ABSALSTMModel(self.mes, self.graph) elif self.model_type == 'NOLSTM': print("NOLSTM, prepare 4 data_generator") self.data_generator = data_generator.DataGenerator(self.mes, trainable, True) self.model = model_cnnpl.NOLSTMModel(self.mes, self.graph) print("NOLSTM, load model") # elif self.model_type == 'ABSA_NOLSTM': # self.data_generator = data_generator_ABSA.DataGeneratorABSANOLSTM(mes, trainable) # self.model = models.ABSANOLSTMModel(self.mes, self.graph) print("start session") self.session = tf.Session(graph=self.graph) print("started session") if trainable: self.docs = utils.get_docs(self.col_name) self.good_accuracy = self.mes.config['PRE_GOOD_RATE'] self.best_accuracy_valid = self.good_accuracy self.best_accuracy_test = -1.0 self.dropout_keep_prob_rate = self.mes.config['PRE_DROPOUT_KEEP_PROB'] self.step_num = self.mes.config['PRE_STEP_NUM'] self.valid_time = self.mes.config['PRE_VALID_TIME'] self.validate_times = self.data_generator.valid_sz / self.data_generator.test_batch_sz self.test_times = self.data_generator.test_sz / self.data_generator.test_batch_sz with self.model.graph.as_default(): if self.mes.config.get('MODEL_RESTORE_PATH', None) is not None and \ os.path.exists(self.mes.config.get('MODEL_RESTORE_PATH', None)): self.model.saver.restore(self.session, self.mes.config['MODEL_RESTORE_PATH']) print 'Restored from', self.mes.config['MODEL_RESTORE_PATH'] else: init = tf.global_variables_initializer() self.session.run(init) else: with self.model.graph.as_default(): if self.mes.config['MODEL_RESTORE_PATH'] is not None: self.model.saver.restore(self.session, self.mes.config['MODEL_RESTORE_PATH']) print 'Restored from', self.mes.config['MODEL_RESTORE_PATH'] else: self.model.saver.restore(self.session, self.model_save_path) self.writer = tf.summary.FileWriter(self.model_log_path, self.session.graph)
def setUp(self): # Parameters params = {'dim': (1363200, 1), 'batch_size': 64, 'n_classes': 6, 'n_channels': 1, 'shuffle': True} # Directories ok_directory = 'C:/Users/Tony/Downloads/Dataset2/OK/' nok_directory = 'C:/Users/Tony/Downloads/Dataset2/NOK/' labels = data_generator.DataGenerator.build_label_list(ok_directory=ok_directory, nok_directory=nok_directory) partition = data_generator.DataGenerator.build_partition(validation_amount=0.3, labels=labels) # Generators self.training_generator = data_generator.DataGenerator(partition['train'], labels, **params) self.validation_generator = data_generator.DataGenerator(partition['validation'], labels, **params) r=1
def train_classifier(model, train_set, val_set): model.compile(loss='categorical_crossentropy', optimizer='adam') print(model.summary()) train_labels = train_set.labels_list() val_labels = val_set.labels_list() training_generator = data_generator.DataGenerator(train_set, range(train_set.size()), train_labels, **PARAMS) validation_generator = data_generator.DataGenerator( val_set, range(val_set.size()), val_labels, **PARAMS) filepath = "weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, epochs=20, callbacks=[checkpoint]) model.save("resnetsaveus.h5")
def __init__(self, model_sub_dir, epoch=None, model_base_dir=DEFAULT_MODEL_BASE_DIR): self.update_model(model_sub_dir, epoch=epoch, model_base_dir=model_base_dir) representative_set_df = pd.read_pickle( os.path.join(DEFAULT_PICKLE_PATH, 'representative_set.pkl')) self.representative_set_gen = data_generator.DataGenerator( df=representative_set_df, base_dir=DEFAULT_VAL_IMG_PATH, input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, num_hg_blocks=1, # does not matter shuffle=False, batch_size=len(representative_set_df), # single batch online_fetch=False, is_eval=True) h = hourglass.HourglassNet(NUM_COCO_KEYPOINTS, DEFAULT_NUM_HG, INPUT_CHANNELS, INPUT_DIM, OUTPUT_DIM) _, val_df = h.load_and_filter_annotations(DEFAULT_TRAIN_ANNOT_PATH, DEFAULT_VAL_ANNOT_PATH, 1.0) self.val_gen = data_generator.DataGenerator( df=val_df, base_dir=DEFAULT_VAL_IMG_PATH, input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, num_hg_blocks=1, # does not matter shuffle=False, batch_size=DEFAULT_BATCH_SIZE, online_fetch=False, is_eval=True) self.cocoGt = COCO(DEFAULT_VAL_ANNOT_PATH) print("Initialized Evaluation Wrapper!")
def global_store(): database = data_generator.DataGenerator('baseline') dfpl = database.table_extract('Plan') dfcs = database.table_extract('Case') dfmn = database.table_extract('Main') dfnl3 = database.table_extract('Nltrd3') key_dict = database.key_dict groups2case = dfpl.groupby('GROUP').groups groups = sorted(list(groups2case.keys())) case_in_group = {} for group in groups: cases = list(dfpl.loc[list(groups2case[group])]['CASE']) case_in_group[group] = cases case = set() for i in range(len(groups)): if i == 0: case = set(case_in_group[groups[i]]) else: case = case & set(case_in_group[groups[i]])
def evaluate_patch_based_network(eval_params, imdb): # patches need to be constructed and passed to the generator for one image at a time if eval_params.save_params.output_var is True: eval_outputs = [] else: eval_outputs = None for ind in imdb.image_range: if eval_params.save_params.output_var is True: eval_output = eoutput.EvaluationOutput() else: eval_output = None cur_full_image = imdb.get_image(ind) cur_patch_labels = imdb.get_patch_label(ind) cur_image_name = imdb.get_image_name(ind) cur_seg = imdb.get_seg(ind) if eval_params.save_params.output_var is True: eval_output.raw_image = cur_full_image eval_output.raw_label = cur_patch_labels eval_output.image_name = cur_image_name eval_output.raw_seg = cur_seg if eval_params.verbosity >= 2: print("Evaluating image number: " + str(ind + 1) + " (" + cur_image_name + ")...") if eval_params.save_params.disable is False: if eval_helper.check_exists(eval_params.save_foldername, cur_image_name): # if the file for this image exists then we have already begun this at some point print("File already exists") else: eval_helper.save_initial_attributes(eval_params, cur_image_name) status = eval_helper.get_complete_status( eval_params.save_foldername, cur_image_name, boundaries=eval_params.boundaries) else: status = 'none' if status == 'none' and (eval_params.eval_mode == 'both' or eval_params.eval_mode == 'network'): # PERFORM STEP 1: evaluate/predict patches with network if eval_params.verbosity >= 2: print("Augmenting data using augmentation: " + eval_params.aug_desc + "...") aug_fn = eval_params.aug_fn_arg[0] aug_arg = eval_params.aug_fn_arg[1] # augment raw full sized image and label augment_image, augment_patch_labels, augment_seg, _, augment_time = \ aug_fn(cur_full_image, cur_patch_labels, cur_seg, aug_arg) if eval_params.save_params.output_var is True: eval_output.aug_image = augment_image eval_output.aug_label = augment_patch_labels eval_output.aug_seg = augment_seg if eval_params.verbosity >= 2: print("Constructing patches...") # construct patches input_patches, input_labels, patch_time = \ datacon.construct_patches_whole_image(augment_image, augment_patch_labels, eval_params.patch_size) patch_imdb = image_db.ImageDatabase(images=input_patches, labels=input_labels) if eval_params.verbosity >= 2: print("Running network predictions...") # use a generator to supply data to model (predict_generator) # we have already previously augmented to image so need to augment the individual patches start_predict_time = time.time() import keras class CustomCallback(keras.callbacks.Callback): def __init__(self, gen): keras.callbacks.Callback.__init__(self) self.gen = gen def on_predict_begin(self, logs=None): self.gen.batch_gen.batch_counter = 0 self.gen.batch_gen.full_counter = 0 self.gen.batch_gen.aug_counter = 0 if not eval_params.ensemble: start_gen_time = time.time() gen = data_generator.DataGenerator( patch_imdb, eval_params.batch_size, aug_fn_args=[], aug_mode='none', aug_probs=[], aug_fly=False, shuffle=False, normalise=eval_params.normalise_input, transpose=eval_params.transpose) end_gen_time = time.time() gen_time = end_gen_time - start_gen_time cust_callback = CustomCallback(gen) predicted_labels = eval_params.loaded_model.predict_generator( gen, verbose=eval_params.predict_verbosity, callbacks=[cust_callback]) print(predicted_labels.shape) else: predicted_labels = [] for i in range(len(eval_params.loaded_models)): start_gen_time = time.time() gen = data_generator.DataGenerator( patch_imdb, eval_params.batch_size, aug_fn_args=[], aug_mode='none', aug_probs=[], aug_fly=False, shuffle=False, normalise=eval_params.normalise_input, transpose=eval_params.transpose) end_gen_time = time.time() gen_time = end_gen_time - start_gen_time predicted_labels.append( eval_params.loaded_models[i].predict_generator( gen, verbose=eval_params.predict_verbosity)) end_predict_time = time.time() predict_time = end_predict_time - start_predict_time if eval_params.verbosity >= 2: print("Converting predictions to boundary maps...") # convert predictions to usable probability maps start_convert_time = time.time() if eval_params.boundaries is True and eval_params.save_params.boundary_maps is True: if not eval_params.ensemble: prob_maps = convert_predictions_to_maps_patch_based( predicted_labels, imdb.image_width, imdb.image_height) else: prob_maps = [] for i in range(len(predicted_labels)): prob_maps.append( np.expand_dims( convert_predictions_to_maps_patch_based( predicted_labels[i], imdb.image_width, imdb.image_height), axis=0)) prob_maps = eval_helper.perform_ensemble_patch(prob_maps) else: prob_maps = None if eval_params.save_params.output_var is True: eval_output.boundary_maps = prob_maps end_convert_time = time.time() convert_time = end_convert_time - start_convert_time # save data to file if eval_params.save_params.disable is False: eval_helper.intermediate_save_patch_based( eval_params, imdb, cur_image_name, prob_maps, predict_time, augment_time, gen_time, convert_time, patch_time, augment_image, augment_patch_labels, augment_seg, cur_full_image, cur_patch_labels, cur_seg) if eval_params.save_params.disable is False: status = eval_helper.get_complete_status( eval_params.save_foldername, cur_image_name, boundaries=eval_params.boundaries) else: status = 'predict' if status == 'predict' and eval_params.boundaries is True and \ (eval_params.eval_mode == 'both' or eval_params.eval_mode == 'gs'): aug_fn = eval_params.aug_fn_arg[0] aug_arg = eval_params.aug_fn_arg[1] # augment raw full sized image and label augment_image, augment_patch_labels, augment_seg, _, augment_time = \ aug_fn(cur_full_image, cur_patch_labels, cur_seg, aug_arg) # load probability maps from previous step if eval_params.save_params.disable is False and eval_params.save_params.boundary_maps is True: prob_maps = eval_helper.load_dataset_extra( eval_params, cur_image_name, "boundary_maps") # PERFORM STEP 2: segment probability maps using graph search boundary_maps = get_boundary_maps_only(imdb, prob_maps) eval_helper.eval_second_step(eval_params, boundary_maps, augment_seg, cur_image_name, augment_image, augment_patch_labels, imdb, dices=None, eval_output=eval_output) elif eval_params.boundaries is False: if eval_params.save_params.disable is False and eval_params.save_params.attributes is True: eval_helper.save_final_attributes(eval_params, cur_image_name, graph_time=None) if eval_params.save_params.disable is False and eval_params.save_params.temp_extra is True: eval_helper.delete_loadsaveextra_file(eval_params, cur_image_name) if eval_params.verbosity >= 2: print("DONE image number: " + str(ind + 1) + " (" + cur_image_name + ")...") print("______________________________") return eval_outputs
def train(generated_data_path=None, epochs=1, examples_cap=None): """Function to train lstm cvae model. Args: generated_data_path: If not None, load pre generated data from this path instead of generating it anew. epochs: Epochs to train the model. examples_cap: If not None, restrict to only this many training examples. Return: Nothing. """ # Prepare ModelConfig base_dir = os.getcwd().replace("/contrastive_vae", "") glove_dir = base_dir.replace("/code", "/data/glove.twitter.27B/") embedding_path = os.path.join(glove_dir, 'glove.twitter.27B.200d.txt') short_jokes_path = os.path.join( base_dir.replace("/code", '/data/short-jokes-dataset/'), "shortjokes.csv") hacker_news_path = os.path.join( base_dir.replace("/code", '/data/hacker-news-dataset/'), "hacker_news_subset_10_to_200.csv") model_dir = (base_dir + "/model_checkpoints/contrastive_vae/{:%Y%m%d_%H%M%S}".format( datetime.now())) model_config = contrastive_vae_model.ModelConfig( positive_data_path=short_jokes_path, contrastive_data_path=hacker_news_path, embedding_path=embedding_path, model_dir=model_dir, embedding_dim=200, batch_size=32, max_nb_words=100000, max_nb_examples=None, max_sequence_length=50, encoder_lstm_dims=[256, 128], decoder_lstm_dims=[128, 256], latent_dim=64, kl_weight=1., optimizer="RMSprop") # Set up logging try: os.makedirs(model_config.model_dir) except: logger.info("Did not successfully make new model dir") logger = logging.getLogger("contrastive_vae") logger.setLevel(logging.INFO) logging.basicConfig(filename=(model_config.model_dir + '/model_log.log'), level=logging.INFO) # Load or generate data logger.info("Loading or generating data...") if generated_data_path: x_train, s_train, x_val, s_val, tokenizer, _, _ = pickle.load( open(generated_data_path, "r")) else: data_gen = data_generator.DataGenerator( positive_data_path=short_jokes_path, contrastive_data_path=hacker_news_path) x_train, s_train, x_val, s_val, tokenizer, _, _ = data_gen.generate() logger.info("Done loading or generating data.") # Build and fit model contra_vae = contrastive_vae_model.ContraVAE(model_config, tokenizer) hist = contra_vae.fit(x_train=x_train, s_train=s_train, x_val=x_val, s_val=s_val, epochs=epochs, examples_cap=examples_cap) logger.info(hist.history) print("Done.")
# ## Premier réseau : Nom des notes #%% if __name__ == "__main__": #nb_labels = 23 # 23 symboles pour les notes nb_labels = 15 # 15 symboles pour les octaves nb_epochs = 50 ids = dict() ids['train'] = os.listdir(os.path.abspath("../data/train_out_x/")) ids['valid'] = os.listdir(os.path.abspath("../data/validation_out_x/")) batch_size_eval = 16 train_generator = datas.DataGenerator(ids['train'], "train", batch_size=12, aug_rate=0.25) valid_generator = datas.DataGenerator(ids['valid'], "validation", batch_size=batch_size_eval, aug_rate=0.25) nb_train = len(ids['train']) nb_eval = len(ids['valid']) x_valid = valid_generator[0] y_valid = np.zeros(len(x_valid[1][0][2])) nb_features = int(x_valid[1][0][0].shape[2]) #Hauteur des images padding_value = 255 #%% network = create_network(nb_features, padding_value, lr=0.0001) #%% checkout_path = "../models/checkout/test2"
from sklearn.cluster import KMeans from sklearn.decomposition import PCA import data_generator as d import pandas as pd import numpy as np import matplotlib.pyplot as plt from pprint import pprint import accuracy as ac i = 1 gen = d.DataGenerator() plt.figure(figsize=(12, 12)) gen.clearDatabase() gen.generateDatabase(1 * 5 / 100, 20) infected_list = np.array(gen.getInfectedList()) km = KMeans(n_clusters=5, n_init=1000, algorithm="full", tol=1e-8) kmp = km.fit_predict(infected_list) li = np.asarray(infected_list) pca = PCA(n_components=2) #2-dim ensional PCA transformed = pd.DataFrame(pca.fit_transform(infected_list)) cent_reduc = pd.DataFrame(pca.fit_transform(km.cluster_centers_)) plt.subplot(320 + i) plt.scatter(transformed[:][0], transformed[:][1], s=20, c=kmp) plt.scatter(cent_reduc[:][0], cent_reduc[:][1],
def rAtk(pred, target, k): #sorted_pred = np.argsort(pred)[::-1] correct = 0 for i in pred[:k]: if i in target: correct += 1 return correct / float(len(target)) gen = SentenceGeneration() gen.readModel('keyword_f') data_gen = data_generator.DataGenerator( "../data/code_f_keyword_indexed.txt", "../data/comment_f_keyword_indexed.txt", 0.20, 600, 20) codes, keywords, raw_comment = data_gen.getTestData() np.random.seed(30) np.random.shuffle(codes) np.random.seed(30) np.random.shuffle(raw_comment) np.random.seed(30) np.random.shuffle(keywords) sens = [] co = [] comm = [] r = 0
def train_test(ds_name, K, mode, ds_path='Datasets/', W=None, max_epochs=100, test_percent=0.20, val_percent=0.10, batch_size=20, savefig=False, showfig=True): """ Description: Training and Evaluating the chosen model on a chosen dataset. input: ds_name – dataset name , as the directory of the dataset. K - number of receptive fields inputs to the model size of W each one. mode - the type of features fed to the classifier. should be in ['vertex','edge','comb','vertex_channels']. ds_path - the path containing the dataset directory. W - size of receptive field , the number of relative graph vertexes inputs into one kernel cnn kernel. should be NaN for recommanded values, integer for costume value of tuple for 'comb' mode. max_epochs - numbebr of maximum numbert of epochs. test_percent - the test set percent from the whole dataset. val_percent - the validation pervent from the train set. batch_size - batch size. savefig - parameter controls whether saving the graph to pdf file. showfig - parameter controls whether showing the graph automatically afrer run. Output: A trained model """ data, labels = prepare_paths(Datasets_dict[ds_name], overwrite=True) num_of_classes = len(set(labels.values())) rands1 = np.random.random(len(data)) if type(W) == int or type(W) == tuple and len(W) == 1: wv = W we = W elif type(W) == tuple: wv = W[0] we = W[1] else: rec_width = get_recommended_width(ds_name, ds_path) wv = rec_width['V'] we = rec_width['E'] print( 'Chosen Recommended width values are {} for verteces and {} for edges' .format(wv, we)) if mode == 'comb': m = create_1DdoubleCnn2(K, wv, we, num_of_classes) W = (wv, we) elif mode == 'vertex': m = create_1Dcnn(K, wv, num_of_classes, n_channels=1) W = (wv, ) elif mode == 'edge': m = create_1Dcnn(K, we, num_of_classes) W = (we, ) elif mode == 'vertex_channels': m = create_1Dcnn(K, wv, num_of_classes, n_channels=4) W = (wv, ) else: raise Exception( "'mode' parameter should be in ['vertex','edge','comb','vertex_channels'] " ) X_train_ids = data[rands1 > test_percent] X_test_ids = data[rands1 <= test_percent] rands2 = np.random.random(len(X_train_ids)) X_val_ids = X_train_ids[rands2 <= val_percent] X_train_ids = X_train_ids[rands2 > val_percent] dg_train = data_generator.DataGenerator(X_train_ids, labels, Datasets_dict[ds_name]['path'], len(set(labels.values())), W=W, k=K, mode=mode, batch_size=batch_size) dg_test = data_generator.DataGenerator(X_test_ids, labels, Datasets_dict[ds_name]['path'], len(set(labels.values())), W=W, k=K, mode=mode) dg_val = data_generator.DataGenerator(X_val_ids, labels, Datasets_dict[ds_name]['path'], len(set(labels.values())), W=W, k=K, mode=mode) dirname = 'TB_Dataset-{}__Mode-{}__K-{}__Width-{}'.format( ds_name, mode, K, '_'.join([str(w) for w in W])) h = m.fit_generator(dg_train, epochs=max_epochs, verbose=2, callbacks=[ TensorBoard(dirname), EarlyStopping(patience=10, monitor='val_acc') ], validation_data=dg_val.getallitems(), workers=1) X_test, y_test = dg_test.getallitems() ev = m.evaluate(X_test, y_test) with open(dirname + '/history.json', 'w') as file: file.write(json.dumps(h.history)) plot_graph(dirname, ds_name, 'val_acc', 'acc', 'Accuracy', h, K, mode, len(h.epoch), savefig, showfig, W, ev[1]) plot_graph(dirname, ds_name, 'val_loss', 'loss', 'Loss', h, K, mode, len(h.epoch), savefig, showfig, W, ev[0]) return m
log_embedding = True if epoch % 20 == 0 else False write_log(iter_i, log_embedding, dg) if (epoch % SAVE_CKP_EVERY) == 0: checkpoint = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } net.save_ckp(checkpoint, './models', epoch) if epoch % UPDATE_LR_EVERY == 0: scheduler.step() for param_group in optimizer.param_groups: lr = param_group['lr'] print(f'Learning rate updated to: {lr}') timeElapsed = datetime.now() - start_time print('Finished Training! Time elapsed (hh:mm:ss.ms) {}'.format(timeElapsed)) print("\nHistory:") print(running_loss_history) print(running_acc_history) write_log(iter_i - 1, log_embedding=True, dg=dg) writer.close() if __name__ == '__main__': batch_size = 32 dg = data_generator.DataGenerator(root='./dataset', batch_size=batch_size) run(dg, batch_size=batch_size, num_epochs=50, lr=5e-4)
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--gpu', default=0, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result/', help='Directory to output the result') parser.add_argument( '--epoch_labelled', '-e', default=100, type=int, help='Number of epochs to learn only with labelled data') parser.add_argument( '--epoch_unlabelled', '-u', default=100, type=int, help='Number of epochs to learn with labelled and unlabelled data') parser.add_argument('--dimz', '-z', default=2, type=int, help='Dimention of encoded vector') parser.add_argument('--batchsize', '-batch', type=int, default=128, help='Learning minibatch size') parser.add_argument('--data', '-d', default='sprites', help='Name of the dataset to be used for experiments') parser.add_argument('--model', '-m', default='conv', help='Convolutional or linear model') parser.add_argument('--beta', '-b', default=100, help='Beta coefficient for the KL loss') parser.add_argument('--gamma', '-g', default=100000, help='Gamma coefficient for the classification loss') parser.add_argument( '--labels', '-l', default="composite", help='Determined how to treat the labels for the different images') parser.add_argument( '--freq', '-f', default=10, help='Frequency at which snapshots of the model are saved.') parser.add_argument( '--mode', default="weakly", help='Mode of training - weakly supervised or unsupervised') args = parser.parse_args() print('\n###############################################') print('# GPU: \t\t\t{}'.format(args.gpu)) print('# dim z: \t\t{}'.format(args.dimz)) print('# Minibatch-size: \t{}'.format(args.batchsize)) print('# Epochs Labelled: \t{}'.format(args.epoch_labelled)) print('# Epochs Unabelled: \t{}'.format(args.epoch_unlabelled)) print('# Dataset: \t\t{}'.format(args.data)) print('# Model Architecture: \t{}'.format(args.model)) print('# Beta: \t\t{}'.format(args.beta)) print('# Gamma: \t\t{}'.format(args.gamma)) print('# Frequency: \t\t{}'.format(args.freq)) print('# Trainign model: \t{}'.format(args.model)) print('# Out Folder: \t\t{}'.format(args.out)) print('###############################################\n') stats = {'train_loss': [], 'train_accs': [], 'valid_loss': [], 'valid_rec_loss': [], 'valid_label_loss': [],\ 'valid_label_acc': [], 'valid_kl': []} models_folder = os.path.join(args.out, "models") manifold_gif = os.path.join(args.out, "gifs/manifold_gif") scatter_gif = os.path.join(args.out, "gifs/scatter_gif") scatter_folder = os.path.join(args.out, "scatter") eval_folder = os.path.join(args.out, "eval") shutil.rmtree(os.path.join(args.out, "models")) os.mkdir(os.path.join(args.out, "models")) if args.mode == "unsupervised": ignore = [] else: ignore = ["unlabelled"] generator = data_generator.DataGenerator() train, train_labels, train_concat, train_vectors, test, test_labels, test_concat, test_vectors, unseen,\ unseen_labels, unseen_concat, unseen_vectors, groups = generator.generate_dataset(ignore=ignore, args=args) data_dimensions = train.shape print('\n###############################################') print("DATA_LOADED") print("# Training: \t\t{0}".format(train.shape)) print("# Training labels: \t{0}".format(set(train_labels))) print("# Training labels: \t{0}".format(train_labels.shape)) print("# Training vectors: \t{0}".format(train_vectors.shape)) print("# Testing: \t\t{0}".format(test.shape)) print("# Testing labels: \t{0}".format(set(test_labels))) print("# Testing labels: \t{0}".format(test_labels.shape)) print("# Testing vectors: \t{0}".format(test_vectors.shape)) print("# Unseen: \t\t{0}".format(unseen.shape)) print("# Unseen labels: \t{0}".format(set(unseen_labels))) print('###############################################\n') train_iter = chainer.iterators.SerialIterator(train_concat, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_concat, args.batchsize, repeat=False, shuffle=False) # Prepare VAE model, defined in net.py if args.model == "conv": if args.data == "sprites": model = net.Conv_VAE(train.shape[1], n_latent=args.dimz, groups=groups, beta=args.beta, gamma=args.gamma) else: model = net.Conv_VAE_MNIST(train.shape[1], args.dimz, beta=args.beta) else: model = net.VAE(train.shape[1], args.dimz, 500) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) lf = model.get_loss_func() stats, model, optimizer, epochs_so_far = training_loop( model=model, optimizer=optimizer, stats=stats, epochs=args.epoch_labelled, train_iter=train_iter, test_iter=test_iter, lf=lf, models_folder=models_folder, args=args) print("Save Model\n") serializers.save_npz(os.path.join(models_folder, 'final.model'), model) print("Save Optimizer\n") serializers.save_npz(os.path.join(models_folder, 'final.state'), optimizer) print("Clear Images from Last experiment\n") clear_last_results(folder_name=args.out) if args.mode == "weakly": model.to_cpu() config_parser = ConfigParser("config/config.json") groups = config_parser.parse_groups() # calculate manifold boundaries latent = model.get_latent(test).data mean = np.mean(latent, axis=0) cov = np.cov(latent.T) no_std = 2 # boundaries are [[min_x, min_y],[max_x, max_y]] boundaries = np.array( [mean - no_std * cov.diagonal(), mean + no_std * cov.diagonal()]) # assign colors to each label for plotting purposes all_labels = np.append(test_labels, unseen_labels, axis=0) colors = attach_colors(labels=all_labels) # visualise the learnt data manifold in the latent space print("Plot Reconstructed images sampeld from a standart Normal\n") data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0) figure_title = "Manifold Visualisation" plot_sampled_images(model=model, data=data, boundaries=boundaries, image_size=data_dimensions[-1], image_channels=data_dimensions[1], filename=os.path.join(args.out, "manifold"), figure_title=figure_title) print("Performing Reconstructions\n") perform_reconstructions(model=model, train=train, test=test, unseen=unseen, no_images=25, name_suffix="supervised", args=args) os.mkdir(os.path.join(scatter_folder, "supervised")) print("Plot Latent Testing Distribution for Singular Labels\n") data = np.repeat(test, 2, axis=0) plot_labels = test_labels plot_separate_distributions(data=data, labels=plot_labels, groups=groups, boundaries=boundaries, colors=colors["singular"], model=model, filename=os.path.join( scatter_folder, "supervised", "singular_separate")) plot_overall_distribution(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["singular"], model=model, filename=os.path.join( scatter_folder, "supervised", "singular_together")) generator = data_generator.DataGenerator() train, train_labels, train_concat, train_vectors, test, test_labels, test_concat, test_vectors, unseen,\ unseen_labels, unseen_concat, unseen_vectors, groups = generator.generate_dataset(args=args) data_dimensions = train.shape print('\n###############################################') print("DATA_LOADED") print("# Training: \t\t{0}".format(train.shape)) print("# Training labels: \t{0}".format(set(train_labels))) print("# Training labels: \t{0}".format(train_labels.shape)) print("# Training vectors: \t{0}".format(train_vectors.shape)) print("# Testing: \t\t{0}".format(test.shape)) print("# Testing labels: \t{0}".format(set(test_labels))) print("# Testing labels: \t{0}".format(test_labels.shape)) print("# Testing vectors: \t{0}".format(test_vectors.shape)) print("# Unseen: \t\t{0}".format(unseen.shape)) print("# Unseen labels: \t{0}".format(set(unseen_labels))) print('###############################################\n') train_iter = chainer.iterators.SerialIterator(train_concat, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_concat, args.batchsize, repeat=False, shuffle=False) model = net.Conv_VAE(train.shape[1], n_latent=args.dimz, groups=groups, beta=args.beta, gamma=args.gamma) serializers.load_npz("result/models/final.model", model) model.gamma = 10000 # model.beta = 1 if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) lf = model.get_loss_func() stats, model, optimizer, _ = training_loop( model=model, optimizer=optimizer, stats=stats, epochs=args.epoch_unlabelled, train_iter=train_iter, test_iter=test_iter, lf=lf, models_folder=models_folder, epochs_so_far=epochs_so_far, args=args) ######################################## ########### RESULTS ANALYSIS ########### ######################################## model.to_cpu() config_parser = ConfigParser("config/config.json") groups = config_parser.parse_groups() # calculate manifold boundaries latent = model.get_latent(test).data mean = np.mean(latent, axis=0) cov = np.cov(latent.T) # boundaries are [[min_x, min_y],[max_x, max_y]] boundaries = np.array( [mean - no_std * cov.diagonal(), mean + no_std * cov.diagonal()]) # assign colors to each label for plotting purposes all_labels = np.append(test_labels, unseen_labels, axis=0) colors = attach_colors(labels=all_labels) # visualise the learnt data manifold in the latent space print("Plot Reconstructed images sampeld from a standart Normal\n") data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0) figure_title = "Manifold Visualisation" plot_sampled_images(model=model, data=data, boundaries=boundaries, image_size=data_dimensions[-1], image_channels=data_dimensions[1], filename=os.path.join(args.out, "manifold_1"), figure_title=figure_title) print("Test time Classification\n") tmp_labels = test_time_classification(data_test=np.repeat(test, 2, axis=0), data_all=np.append(test, unseen, axis=0), labels=test_labels, unseen_labels=unseen_labels, groups=groups, boundaries=boundaries, model=model, colors=colors, folder_name=eval_folder) print("Label Analisys\n") true_labels = np.append(test_labels, unseen_labels, axis=0) label_analysis(labels=true_labels, predictions=tmp_labels, groups=groups, model=model, folder_name=eval_folder) print("Saving the loss plots\n") plot_loss_curves(stats=stats, args=args) print("Evaluate Axes Alignment\n") data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0) plot_labels = np.append(test_labels, unseen_labels, axis=0) axes_alignment(data=data, labels=plot_labels, model=model, folder_name=eval_folder) print("Performing Reconstructions\n") perform_reconstructions(model=model, train=train, test=test, unseen=unseen, no_images=25, name_suffix="weakly_supervised", args=args) print("Plot Latent Testing Distribution for Singular Labels\n") data = np.repeat(test, 2, axis=0) plot_labels = test_labels plot_separate_distributions(data=data, labels=plot_labels, groups=groups, boundaries=boundaries, colors=colors["singular"], model=model, filename=os.path.join(scatter_folder, "singular_separate")) plot_overall_distribution(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["singular"], model=model, filename=os.path.join(scatter_folder, "singular_together")) print( "Plot Latent Testing Distribution for Singular Labels + Unseen Distribution\n" ) data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0) plot_labels = np.append(test_labels, unseen_labels, axis=0) plot_separate_distributions(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["singular"], model=model, filename=os.path.join( scatter_folder, "singular_separate_unseen")) plot_overall_distribution(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["singular"], model=model, filename=os.path.join( scatter_folder, "singular_together_unseen")) if args.labels == "composite": print("Plot Latent Testing Distribution for Composite Labels\n") # compose the composite labels data = test test_labels_tmp = test_labels.reshape(len(test_labels) / 2, 2) plot_labels = np.array(["_".join(x) for x in test_labels_tmp]) plot_separate_distributions(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["composite"], model=model, filename=os.path.join( scatter_folder, "composite_separate")) plot_overall_distribution(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["composite"], model=model, filename=os.path.join( scatter_folder, "composite_together")) print( "Plot Latent Testing Distribution for Composite Labels + Unseen Distribution\n" ) data = np.append(test, unseen, axis=0) test_labels_tmp = np.append(test_labels, unseen_labels, axis=0) test_labels_tmp = test_labels_tmp.reshape(len(test_labels_tmp) / 2, 2) plot_labels = np.array(["_".join(x) for x in test_labels_tmp]) plot_separate_distributions(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["composite"], model=model, filename=os.path.join( scatter_folder, "composite_separate_unseen")) plot_overall_distribution(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["composite"], model=model, filename=os.path.join( scatter_folder, "composite_together_unseen")) print("Generating data for retrospective model evaluation\n") for model_name in list( filter(lambda name: "final" not in name, os.listdir(models_folder))): serializers.load_npz(os.path.join(models_folder, model_name), model) filename = model_name.replace(".model", "") figure_title = "Manifold Visualisation for epoch {0}".format(filename) data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0) plot_sampled_images(model=model, data=data, boundaries=boundaries, image_size=data_dimensions[-1], image_channels=data_dimensions[1], filename=os.path.join(manifold_gif, filename), figure_title=figure_title) data = np.repeat(np.append(test, unseen, axis=0), 2, axis=0) plot_labels = np.append(test_labels, unseen_labels, axis=0) for key in groups: if not os.path.exists(os.path.join(scatter_gif, key)): os.mkdir(os.path.join(scatter_gif, key)) plot_group_distribution(data=data, labels=plot_labels, boundaries=boundaries, colors=colors["singular"], model=model, group_id=key, filename=os.path.join( scatter_gif, key, filename)) print("Making the Latent Manifold GIF\n") samples = [x.split('_')[0] for x in os.listdir(manifold_gif)] rests = ['_'.join(x.split('_')[1:]) for x in os.listdir(manifold_gif)] samples.sort(key=int) samples = [ os.path.join(manifold_gif, x + "_" + rest) for (x, rest) in zip(samples, rests) ] result_name = os.path.join(manifold_gif, "samples_animation.gif") subprocess.call(["convert", "-loop", "5", "-delay", "50"] + samples + [result_name]) for key in groups: print("Making the Composite Label Distribution GIF for group" + key + "\n") folder_name = os.path.join(scatter_gif, key) distr = [x.replace(".png", "") for x in os.listdir(folder_name)] distr.sort(key=int) distr = [os.path.join(folder_name, x + ".png") for x in distr] result_name = os.path.join(folder_name, "distr_animation.gif") subprocess.call(["convert", "-loop", "5", "-delay", "50"] + distr + [result_name])
] list_with_train_labels_paths = [ DATA_FOLDER + 'train/' + 'labels/' + x for x in sorted(os.listdir(DATA_FOLDER + 'train/' + 'labels/')) ] list_with_val_imgs_paths = [ DATA_FOLDER + 'test/' + 'data/' + x for x in sorted(os.listdir(DATA_FOLDER + 'test/' + 'data/')) ] list_with_labels_paths = [ DATA_FOLDER + 'test/' + 'labels/' + x for x in sorted(os.listdir(DATA_FOLDER + 'test/' + 'labels/')) ] train_set = data_generator.DataGenerator(list_with_train_imgs_paths, list_with_train_labels_paths, cache=True, augmentation=True) train_set = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) test_set = data_generator.DataGenerator(list_with_val_imgs_paths, list_with_labels_paths, cache=True, augmentation=False) test_set = DataLoader(test_set, batch_size=BATCH_SIZE) def sliding_window(top, step=10, window_size=(20, 20)): """ Slide a window_shape window across the image with a stride of step """ for x in range(0, top.shape[0], step): if x + window_size[0] > top.shape[0]: x = top.shape[0] - window_size[0]
dataset_info['train_size'] = 50000 if datatype == 'cifar-100': image_size = 24 n_iterations = 400 num_labels = 100 dataset_info['dataset_name'] = 'cifar-100' dataset_info['n_channels'] = 3 dataset_info['resize_to'] = 0 dataset_info['n_slices'] = 1 dataset_info['train_size'] = 50000 batch_size = dataset_info['train_size'] // 10 train_dataset, train_labels = read_data_file(datatype) data_gen = data_generator.DataGenerator( batch_size, num_labels, dataset_info['train_size'], dataset_info['n_slices'], image_size, dataset_info['n_channels'], dataset_info['resize_to'], dataset_info['dataset_name'], session) if datatype != 'imagenet-250': tf_train_images = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, dataset_info['n_channels']), name='TrainDataset') else: train_train_images = tf.placeholder( tf.float32, shape=(batch_size, dataset_info['resize_to'], dataset_info['resize_to'], dataset_info['n_channels']), name='TrainDataset')
epochs=args.epochs, loss_weight=args.loss_weight, checkpoint_dir=args.checkpoint_dir, logs=args.tensorboard_dir, ) if (args.prev_checkpoint != None): AnoVAEGAN1.load_model_checkpoint(args.prev_checkpoint) train_path_list = os.listdir(args.dataset + '/train') test_path_list = os.listdir(args.dataset + '/test') train_generator = data_generator.DataGenerator( list_IDs=train_path_list, directory=args.dataset + '/train', batch_size=args.batch_size, image_size=(args.image_size, args.image_size), n_channels=args.n_channels, shuffle=args.shuffle_data) test_generator = data_generator.DataGenerator(list_IDs=test_path_list, directory=args.dataset + '/test', batch_size=len(test_path_list)) # generated_images = AnoVAEGAN1.generator(train_generator.__getitem__(0), training = True) # # print(generated_images.shape) #Adding data to JSON file. #metadata = {} #metadata['dataset'] = args.dataset #metadata['batch_size'] = batch_size
import data_generator import json import sys gen = data_generator.DataGenerator('fields.json') suffix = str(0) total = 5000000 data = [] for i in range(total + 1): if i % 10000 == 0 and i > 2: with open('testfiles/testdata_' + suffix, 'a') as fh: json.dump(data, fh, indent=1) data = [] suffix = str(i) print('{}%'.format(i / total * 100), end='\r') if i == total: break data.append(gen.generate())
flog.write('leaky:{}, dropout:{}, rnnlen: {}, segment_size:{}\n'.format( leaky, dropout, rnn_len, segment_size)) flog.write('version:{}, existing model:{}\n'.format(version, existing_model)) flog.write('train data: {}, {}\n'.format(file_train_data, file_train_label)) flog.flush() ########## Loading data params = { 'dim': (segment_size, nbr_feature), 'batch_size': batch_size, 'n_channels': 1, 'rnn_len': rnn_len } train_generator = data_generator.DataGenerator(file_train_data, file_train_label, shuffle=True, **params) val_generator = data_generator.DataGenerator( file_val_data, file_val_label, shuffle=False, **params) #set shuffle=False to calculate AUC test_generator1 = data_generator.DataGenerator( file_test_data1, file_test_label1, shuffle=False, use_reverse=False, **params) #set shuffle=False to calculate AUC test_generator2 = data_generator.DataGenerator( file_test_data2, file_test_label2, shuffle=False, use_reverse=False,
import matplotlib.pyplot as plt img = scipy.io.loadmat(temp[0])['wrap'] mask_x = unwrap(img, wrap_around_axis_0=False, wrap_around_axis_1=False, wrap_around_axis_2=False) plt.figure(figsize=(10, 10)) plt.subplot(121) plt.imshow(dg.normalize_angle(img), cmap='jet') plt.subplot(122) plt.imshow(dg.normalize_angle(mask_x), cmap='jet') plt.show() class_map = 1 test_generator = dg.DataGenerator(test_pair, class_map, batch_size=20, dim=(256, 256, 1), shuffle=True) test_steps = test_generator.__len__() test_steps class eval_denoising: def __init__( self, I1, I2, # I1 and I2 are the two images to compare I3=None, # Image bruitée PSNR_peak=255): # default value for PSNR self.I1 = I1 # result self.I2 = I2 # objective
import imp import evaluation import data_generator imp.reload(evaluation) imp.reload(data_generator) from constants import * representative_set_df = pd.read_pickle(os.path.join(DEFAULT_PICKLE_PATH, 'representative_set.pkl')) subdir = '2021-03-31-08h-54m_batchsize_16_hg_4_loss_weighted_mse_aug_light_sigma4_learningrate_5.0e-03_opt_rmsProp_gt-4kp_activ_sigmoid_subset_0.50_wmse-1-5' generator = data_generator.DataGenerator( df=representative_set_df, base_dir=DEFAULT_VAL_IMG_PATH, input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, num_hg_blocks=1, # doesn't matter for evaluation b/c we take one stack for GT shuffle=False, batch_size=len(representative_set_df), online_fetch=False) # %% Run visualization on epoch range and save images to disk epochs_to_visualize = [27, 28] #range(34,45) print("\n\nEval start: {}\n".format(time.ctime())) for epoch in epochs_to_visualize: eval = evaluation.Evaluation( model_sub_dir=subdir, epoch=epoch) X_batch, y_stacked = generator[0] # There is only one batch in the generator y_batch = y_stacked[0] # take first hourglass section
def run(dg, batch_size=128, num_epochs=5, lr=0.001): if dg is None: dg = data_generator.DataGenerator(root='./dataset', batch_size=batch_size) optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0, 0)) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95, last_epoch=-1) total_iters = -(-len(dg.train_dataset) // batch_size) * num_epochs print("NUM_EPOCHS = {}, BATCH_SIZE = {}, len(train_set) = {} " "--> #Iterations = {}\n".format(num_epochs, batch_size, len(dg.train_dataset), total_iters)) start_time = datetime.now() iter_i = 1 for epoch in range(1, num_epochs + 1): print(f'Epoch {epoch}') running_loss = 0.0 for batch_i, anchor in enumerate(dg.train_loader): inputs = dg.make_batch(anchor) inputs = inputs.to(device) # forward + backward + optimize outputs = model(inputs) loss = total_loss(outputs) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() iter_i += 1 if iter_i % 10 == 0: writer.add_scalar("Loss", loss.item(), iter_i) if iter_i % 1000 == 0: write_log(iter_i, log_embedding=True, dg=dg) else: # Note: set DEBUG=True to see classification acc. after every epoch but comes with the cost of # computing the histogram, which takes up some time and therefore leading to a longer training time DEBUG = True print_loss_acc(DEBUG, running_loss, dg) if total_iters < 1000 and epoch % 5 == 0: log_embedding = True if epoch % 20 == 0 else False write_log(iter_i, log_embedding, dg) if (epoch % SAVE_CKP_EVERY) == 0: checkpoint = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } net.save_ckp(checkpoint, './models', epoch) if epoch % UPDATE_LR_EVERY == 0: scheduler.step() for param_group in optimizer.param_groups: lr = param_group['lr'] print(f'Learning rate updated to: {lr}') timeElapsed = datetime.now() - start_time print('Finished Training! Time elapsed (hh:mm:ss.ms) {}'.format(timeElapsed)) print("\nHistory:") print(running_loss_history) print(running_acc_history) write_log(iter_i - 1, log_embedding=True, dg=dg) writer.close()
# %% representative_set_df = pd.read_pickle( os.path.join(DEFAULT_PICKLE_PATH, 'representative_set.pkl')) subdir = '2021-04-01-21h-59m_batchsize_16_hg_4_loss_weighted_mse_aug_light_sigma4_learningrate_5.0e-03_opt_rmsProp_gt-4kp_activ_sigmoid_subset_0.50_lrfix' eval = evaluation.Evaluation(model_sub_dir=subdir, epoch=26) # %% Save stacked evaluation heatmaps import data_generator imp.reload(data_generator) import time generator = data_generator.DataGenerator(df=representative_set_df, base_dir=DEFAULT_VAL_IMG_PATH, input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, num_hg_blocks=eval.num_hg_blocks, shuffle=False, batch_size=len(representative_set_df), online_fetch=False) # Select image to predict heatmaps X_batch, y_stacked = generator[0] # There is only one batch in the generator # X_batch, y_stacked = evaluation.load_and_preprocess_img('data/skier.jpg', eval.num_hg_blocks) y_batch = y_stacked[0] # take first hourglass section # Save stacked heatmap images to disk m_batch = representative_set_df.to_dict( 'records' ) # TODO: eventually this will be passed from data generator as metadata print("\n\nEval start: {}\n".format(time.ctime())) eval.visualize_batch(X_batch, y_batch, m_batch) print("\n\nEval end: {}\n".format(time.ctime()))
import data_generator from keras.preprocessing import sequence import keras from keras.models import Model from bleu import computeMaps, bleuFromMaps from text_generator import SentenceGeneration max_caption_len = 26 maxlen = 500 mem_size = 30 data_gen = data_generator.DataGenerator( "../qnaData/code_f_keyword_indexed.txt", "../qnaData/comment_f_indexed.txt", 0.20, maxlen, max_caption_len) codes, partial_captions, next_words = data_gen.MakeDataset3(train=True) # codes = sequence.pad_sequences(codes, maxlen=maxlen) partial_captions = sequence.pad_sequences(partial_captions, maxlen=max_caption_len) codesT, partial_captionsT, next_wordsT = data_gen.MakeDataset3(train=False) codesT = sequence.pad_sequences(codesT, maxlen=maxlen) partial_captionsT = sequence.pad_sequences(partial_captionsT, maxlen=max_caption_len) vocab_size = 5000
import tensorflow as tf from tensorflow import keras import numpy as np import data_generator as dg data_generator = dg.DataGenerator() training_samples = data_generator.training_data(1000) num_test_samples = 10 testing_samples = data_generator.testing_data(num_test_samples) input_dim = len(training_samples[0]['input']) num_classes = 4 # optimizer = 'rmsprop' optimizer = keras.optimizers.SGD(0.1, 0.9, nesterov=True) model = keras.Sequential() # model.add(keras.layers.Dense(16)) # model.add(keras.layers.Activation('relu')) model.add(keras.layers.Dense(4, activation='relu', input_dim=input_dim)) # model.add(keras.layers.Dropout(0.5)) # model.add(keras.layers.Dense(8, activation='relu')) # model.add(keras.layers.Dropout(0.5)) model.add(keras.layers.Dense(num_classes, activation='sigmoid')) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) training_data, training_labels = data_generator.samples_to_keras(
accuracy_logger.addHandler(accuracyFH) accuracy_logger.info('#Train EnvID, Epoch, Test EnvID, Non-collision Accuracy,Non-collision Accuracy(Soft),Non-collision loss,' + 'Preci-NC-L,Preci-NC-S,Preci-NC-R,,Rec-NC-L,Rec-NC-S,Rec-NC-R') accuracy_loggers.append(accuracy_logger) graph = tf.Graph() configp = tf.ConfigProto(allow_soft_placement=True,log_device_placement=False) sess = tf.InteractiveSession(graph=graph,config=configp) with sess.as_default() and graph.as_default(): cnn_variable_initializer.set_from_main(sess) cnn_variable_initializer.build_tensorflw_variables_detached() models_utils.set_from_main(sess,logger) train_data_gen = data_generator.DataGenerator( config.BATCH_SIZE, config.TF_NUM_CLASSES, dataset_sizes['train_dataset'], config.TF_INPUT_SIZE, sess, dataset_filenames['train_dataset'], config.TF_INPUT_AFTER_RESIZE,False ) test_data_gen = data_generator.DataGenerator( config.BATCH_SIZE, config.TF_NUM_CLASSES, dataset_sizes['test_dataset'], config.TF_INPUT_SIZE, sess, dataset_filenames['test_dataset'], config.TF_INPUT_AFTER_RESIZE, True ) tf_train_img_ids, tf_train_images, tf_train_labels = train_data_gen.tf_augment_data_with() tf_test_img_ids, tf_test_images, tf_test_labels = test_data_gen.tf_augment_data_with() define_tf_ops(tf_train_images, tf_train_labels, tf_test_images, tf_test_labels) tf.global_variables_initializer().run(session=sess) for main_ep in range(3):