def setup_gens(data, confs): """ Create training/validation generators. """ t_config, v_config = confs train_data = [d for d in data if d['train']==True] val_data = [d for d in data if d['train']==False] train_gen = DataGenerator(train_data, t_config) val_gen = DataGenerator(val_data, v_config) return train_gen, val_gen
def preprocess_chipseq(num_jobs, bin_size): datagen = DataGenerator() processes = [] celltypes = datagen.get_celltypes() transcription_factors = datagen.get_trans_fs() for part in ['train']: with open('../data/annotations/%s_regions.blacklistfiltered.merged.bed' % part) as fin: lines = fin.read() for celltype in celltypes: for transcription_factor in transcription_factors: if not os.path.exists('../data/chipseq_fold_change_signal/ChIPseq.%s.%s.fc.signal.train.bw' % (celltype, transcription_factor)): continue fout_path = '../data/preprocess/CHIPSEQ_FEATURES/%s_%s_%d.gz' % ( celltype, transcription_factor, bin_size) if not os.path.exists(fout_path): processes.append( Process(target=parralelChIPSeqSignalProcessor, args=(lines, fout_path, celltype, transcription_factor, bin_size))) for i in range(0, len(processes), num_jobs): map(lambda x: x.start(), processes[i:i + num_jobs]) map(lambda x: x.join(), processes[i:i + num_jobs])
def simple_test(expt): # Build a generator and a classifier that are perfectly matched # with respect to means and see what sort of error rate we get for # various variance values in the generator. gen = DataGenerator(expt.num_phonemes, expt.num_features, expt.var_diag_interval, expt.var_offdiag_interval) test_data = gen.generate_simulated_data(expt.num_test_frames) # Make perfect "training data" in the form of two points for each # class whose mean is exactly the mean for that class. Training # on this will give a correct mean for the model, but with some # non-zero variance labels = gen.get_labels() means = [array(target) for target in gen._targets] # Construct a list of (label, point) pairs with two points for each label delta = [0.1] * expt.num_features assert len(labels) == len(means) data = zip(labels, (m + delta for m in means)) + zip(labels, (m - delta for m in means)) # print dump_data(data) c = SimpleClassifier(labels, gen.num_features) c.train_all(data) (rate, results) = measureAccuracy(c, test_data) summary = make_summary_string("Simple test", rate, results, c, test_data, gen) print summary
def main(save=True): """ Train a model \n ave {bool} - whether to save the trained model (default: True) \n Returns: wrapper RNN class for a Keras model (e.g. keras.models.Sequential) """ startTime = time() trainingSet, validationSet, scaler = setup() trainGen = DataGenerator(trainingSet, scaler, windowSize=WINDOW_SIZE, lookback=LOOKBACK, sampleRate=SAMPLERATE, prediction=PREDICTION).generator() validGen = DataGenerator(validationSet, scaler, windowSize=WINDOW_SIZE, lookback=LOOKBACK, sampleRate=SAMPLERATE, prediction=PREDICTION).generator() rnn = RNN(HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION) optimizer = rnn.pickOptimizer(OPTIMIZER, lr=LEARNING_RATE) rnn.model.compile(loss=LOSS_FUNC, optimizer=optimizer) rnn.model.fit_generator(trainGen, steps_per_epoch=STEPS_PER_EPOCH, epochs=EPOCHS, validation_data=validGen, validation_steps=VALIDATION_STEP_PER_EPOCH, verbose=2, shuffle=False) endTime = time() print( f"\nTRAINING DONE. Total time elapsed: {strftime('%H:%M:%S', gmtime(endTime - startTime))}" ) if save: weightsFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION, WEIGHT_EXT) architectureFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION, ARCHITECT_EXT) rnn.saveWeights(weightsFile) rnn.saveArchitecture(architectureFile) return rnn
def __init__(self, config_file='config.cfg', model='hg_refined_tiny_200'): """ Initilize the Predictor Args: config_file : *.cfg file with model's parameters model : *.index file's name. (weights to load) """ t = time() params = process_config(config_file) datatest1 = DataGenerator(joints_name=params['joint_list'], img_dir_test=params['img_directory_test1'], test_data_file=params['test_txt_file1'], remove_joints=params['remove_joints']) datatest1._create_test_table() datatest2 = DataGenerator(joints_name=params['joint_list'], img_dir_test=params['img_directory_test2'], test_data_file=params['test_txt_file2'], remove_joints=params['remove_joints']) datatest2._create_test_table() datatest3 = DataGenerator(joints_name=params['joint_list'], img_dir_test=params['img_directory_test3'], test_data_file=params['test_txt_file3'], remove_joints=params['remove_joints']) datatest3._create_test_table() datatest4 = DataGenerator(joints_name=params['joint_list'], img_dir_test=params['img_directory_test4'], test_data_file=params['test_txt_file4'], remove_joints=params['remove_joints']) datatest4._create_test_table() self.predict = PredictProcessor(params) self.predict.color_palette() self.predict.LINKS_JOINTS() self.predict.model_init() self.predict.load_model(load=model) self.predict._create_prediction_tensor() # self.predict.compute_pck(datagen=datatest,idlh=9,idrs=2) # self.predict.save_output_as_mat(datagen=datatest,idlh=9,idrs=2) self.predict.save_multioutput_as_mat(datagen1=datatest1, datagen2=datatest2, datagen3=datatest3, datagen4=datatest4, idlh=9, idrs=2) print('Done: ', time() - t, ' sec.')
def train(self): history = self.model.fit(DataGenerator(batch_size=5), verbose=1, epochs=20, callbacks=[self.callback, self.checkpoint]) model_hist = history.history with open('Data/model_results', 'wb') as file: pickle.dump(model_hist, file) self.model.save_weights('Data/model/model_weights.h5')
def get_train_data(self): self.datagen = DataGenerator(self.mdp) if self.generate_new_episodes: self.train_episodes = self.datagen.gen_episodes( \ self.num_train_episodes, self.path) else: self.train_episodes = np.load(self.path) self.train_images = np.array([ep[0] for ep in self.train_episodes]) self.train_actions = np.array([ep[1] for ep in self.train_episodes]) self.train_reward_labs = np.array([ep[2] for ep in self.train_episodes]) self.train_qval_labs = np.array([ep[3] for ep in self.train_episodes]) self.train_label_to_im_dict = self.datagen.label_to_im_dict
def do_baseline_runs(expt): gen = DataGenerator(expt.num_phonemes, expt.num_features, expt.var_diag_interval, expt.var_offdiag_interval) all_results = [] for run_idx in range(expt.num_runs): test_data = gen.generate_simulated_data(expt.num_test_frames) # There's a problem here if there's only one data point, since # then we end up with a variance of 0. We currently hack # around this problem by guaranteeing more than one point. We # could change the models to allow zero variance but this will # mean not being able to make samples from the models without # some extra work. Note that we don't care at all about order # of training data in these experiments, so we just build our # training data in two parts and cat them together. If you # hit either of these asserts, you're asking for an error rate # that's too hig and/or a training data size that's too low. # We need two correct samples per phoneme. num_secondary_frames = expt.num_training_frames - expt.num_phonemes * 2 num_errorful_frames = expt.num_training_frames * expt.training_error_rate assert expt.num_training_frames >= expt.num_phonemes * 2 assert num_secondary_frames > num_errorful_frames errorless_training_data = gen.generate_simulated_data_per_phoneme(2) secondary_training_data = gen.generate_simulated_data( num_secondary_frames) # Slight trickiness to get a correct error rate for this subset of the data subset_error_rate = float(num_errorful_frames) / num_secondary_frames errorful_training_data, num_errors = gen.add_errors_to_data( secondary_training_data, subset_error_rate) practice_data = gen.generate_simulated_data(expt.num_practice_frames) errorful_practice_data, num_errors = gen.add_errors_to_data( practice_data, expt.practice_error_rate) training_data = errorless_training_data + errorful_training_data + errorful_practice_data c = SimpleClassifier(gen.get_labels(), gen.num_features) c.train_all(training_data) (rate, results) = measureAccuracy(c, test_data) name = "Baseline 0.%d" % (run_idx, ) summary = make_summary_string(name, rate, results, c, test_data, gen) all_results.append((name, rate)) # print "Classifier:\n" # print c.to_string() # print summary print "\n--------------------------Summary-----------------------" print make_all_runs_summary_string(expt, all_results)
def do_simple_allele_test(expt): gen = DataGenerator(expt.num_phonemes, expt.num_features, expt.var_diag_interval, expt.var_offdiag_interval) test_data = gen.generate_simulated_data(expt.num_test_frames) for run_idx in range(0, expt.num_runs): training_data, num_errors = make_training_data(gen, expt) # select training data frames to be tested, put into sample_training_frames # sample_training_frames is a subset of the training data consisting of some # errorful frames and some correct frames - we hope to identify the # incorrect frames # For now, use first 5 frames and last 5. The former will have errors and the # latter will be correct n = len(training_data) assert (n * expt.training_error_rate > 5) # number of errorful points assert (n * (1 - expt.training_error_rate) > 5 ) # number of correct points sample_training_frame_indices = range(0, 5) + range(n - 5, n) c = SimpleClassifier(gen.get_labels(), gen.num_features) c.train_all(training_data) all_results = [] for i in sample_training_frame_indices: label = training_data[i][0] a = SimpleAllele(c, [label]) # subtract (label, frame) from training_data for active phoneme alt_data = training_data[:i] + training_data[i + 1:] # train alternate model in allele on alternate data a.train_variants(alt_data) # print a.make_details_string() results = measurePrimaryAndVariantAccuracy(a, test_data) print results all_results.append(results) print 'End run %d \n' % (run_idx, )
def preprocess_dnase(num_jobs, bin_size): datagen = DataGenerator() processes = [] celltypes = datagen.get_celltypes() for part in ['train', 'ladder', 'test']: with open('../data/annotations/%s_regions.blacklistfiltered.merged.bed' % part) as fin: lines = fin.read() for celltype in celltypes: if not os.path.exists('../data/preprocess/DNASE_FEATURES/%s_%s_%d.txt' % (celltype, part, bin_size)): fout_path = '../data/preprocess/DNASE_FEATURES/%s_%s_%d.gz' % (celltype, part, bin_size) processes.append( Process( target=parralelDNAseSignalProcessor, args=(lines, fout_path, celltype, bin_size))) num_processes = num_jobs for i in range(0, len(processes), num_processes): map(lambda x: x.start(), processes[i:i + num_processes]) map(lambda x: x.join(), processes[i:i + num_processes])
import tensorflow as tf import os tf.app.flags.DEFINE_string("configfile", "config/config_mpii.cfg", "config file name") tf.app.flags.DEFINE_string("loadmodel", None, "model name used to continue training") FLAGS = tf.app.flags.FLAGS if __name__ == '__main__': print('--Parsing Config File') params = process_config(FLAGS.configfile) os.system('mkdir -p {}'.format(params['saver_directory'])) os.system('cp {0} {1}'.format(FLAGS.configfile, params['saver_directory'])) print('--Creating Dataset') dataset = DataGenerator(params['joint_list'], params['img_directory'], params['training_txt_file'], params['img_size']) dataset._create_train_table() dataset._randomize() dataset._create_sets() model = HourglassModel(params=params, dataset=dataset, training=True) model.create_model() model.do_train(nEpochs=params['nepochs'], epochSize=params['epoch_size'], saveStep=params['saver_step'], dataset=None, load=FLAGS.loadmodel)
"--model", required=True, help="path to output model (.h5)") args = vars(ap.parse_args()) data_train = pd.read_csv(args['csv']) data_train["file_path"] = data_train["Id"].apply( lambda x: os.path.join(args['dataset'], str(x) + ".npy")) X_train, X_val, y_train, y_val = train_test_split(data_train["file_path"], data_train["Label"], stratify=data_train["Label"], test_size=0.2, random_state=42) training_generator = DataGenerator(X_train, batch_size=50, shuffle=True) validation_generator = DataGenerator(X_val) model = MalwareModel() model = model.create() print(model.summary()) model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=15) if not os.path.isdir(os.path.dirname(args['model'])): os.makedirs(os.path.dirname(args['model'])) model.save(args['model'])
def predict(modelpath, UNTRAINED_MODEL=False): if UNTRAINED_MODEL: rnn = RNN(HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, 1) else: rnn = loadTrainedModel(modelpath) trainingSet, validationSet, scaler = setup() testSet = readDataset(TEST_SET) if rnn.sampleRate < rnn.windowSize: trainGen = DataGenerator(trainingSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.windowSize) validateGen = DataGenerator(validationSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.windowSize) testGen = DataGenerator(testSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.windowSize) batchLength = rnn.windowSize else: trainGen = DataGenerator(trainingSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.sampleRate) validateGen = DataGenerator(validationSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.sampleRate) testGen = DataGenerator(testSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.sampleRate) batchLength = rnn.sampleRate # or sampleRate * windowSize? trainingSetTrueSize = TRAINING_DATASIZE - trainGen.maxStepIndex - trainGen.minIndex validationSetTrueSize = VALIDATION_DATASIZE - validateGen.maxStepIndex - validateGen.minIndex testSetTrueSize = TEST_DATASIZE - testGen.maxStepIndex - testGen.minIndex trainStep = int(trainingSetTrueSize / batchLength) validateStep = int(validationSetTrueSize / batchLength) testStep = int(testSetTrueSize / batchLength) if DEBUG: print( f"trainStep: {trainStep}, validationStep: {validateStep}, testStep: {testStep}" ) # Model predictions start = time.time() trainPred = rnn.model.predict_generator( trainGen.generator(returnLabel=False), trainStep) end = time.time() if DEBUG: print( f"Time to make {trainPred.shape} training predictions: {end - start:.3f}, training dataset shape {trainingSet.shape}" ) start = time.time() validatePred = rnn.model.predict_generator( validateGen.generator(returnLabel=False), validateStep) end = time.time() if DEBUG: print( f"Time to make {validatePred.shape} validation predictions: {end - start:.3f}, validation dataset shape {validationSet.shape}" ) start = time.time() testPred = rnn.model.predict_generator( testGen.generator(returnLabel=False), testStep) end = time.time() if DEBUG: print( f"Time to make {testPred.shape} test predictions: {end - start:.3f}, test dataset shape {testSet.shape}" ) # Undo the standardization on the predictions trainPred = scaler.inverse_transform(trainPred) validatePred = scaler.inverse_transform(validatePred) testPred = scaler.inverse_transform(testPred) # Sampling like this # | - minIndex - | | - maxStepIndex - | # [ .......... { TRUE SIZE } .............. ] trainingTruth = trainingSet[trainGen. minIndex:-trainGen.maxStepIndex].ravel() validationTruth = validationSet[validateGen.minIndex:-validateGen. maxStepIndex].ravel() testTruth = testSet[testGen.minIndex:-testGen.maxStepIndex].ravel() if DEBUG: print( f"trainingTruth shape: {trainingTruth.shape}, validationTruth shape: {validationTruth.shape}, testTruth shape: {testTruth.shape}" ) groundTruth = np.block([trainingTruth, validationTruth, testTruth]) return trainPred, validatePred, testPred, groundTruth
def train(resnet_model, is_training, F, H, F_curr, H_curr, input_images_blur, input_images_boundary, next_boundary_gt, labels, data_dir, data_dir_valid, img_list, img_list_valid, dropout_ratio): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) val_step = tf.get_variable('val_step', [], initializer=tf.constant_initializer(0), trainable=False) # define the losses. lambda_ = 1e-5 loss_1 = resnet_model.l2_loss_(resnet_model.logits, labels) loss_2 = resnet_model.l2_loss_(resnet_model.next_frame,next_boundary_gt) loss_3 = resnet_model.l2_loss_(input_images_blur[:,:,:,-3:],resnet_model.video_deblur_output) loss_ = loss_1+loss_2+loss_3+tf.reduce_sum(tf.square(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)))*lambda_ ema = tf.train.ExponentialMovingAverage(resnet_model.MOVING_AVERAGE_DECAY, global_step) tf.add_to_collection(resnet_model.UPDATE_OPS_COLLECTION, ema.apply([loss_])) tf.summary.scalar('loss_avg', ema.average(loss_)) ema = tf.train.ExponentialMovingAverage(0.9, val_step) val_op = tf.group(val_step.assign_add(1), ema.apply([loss_])) tf.summary.scalar('loss_valid', ema.average(loss_)) tf.summary.scalar('learning_rate', FLAGS.learning_rate) # define the optimizer and back propagate. opt = tf.train.AdamOptimizer(FLAGS.learning_rate) grads = opt.compute_gradients(loss_) for grad, var in grads: if grad is not None and not FLAGS.minimal_summaries: tf.summary.histogram(var.op.name + '/gradients', grad) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) batchnorm_updates = tf.get_collection(resnet_model.UPDATE_OPS_COLLECTION) batchnorm_updates_op = tf.group(*batchnorm_updates) train_op = tf.group(apply_gradient_op, batchnorm_updates_op) saver_all = tf.train.Saver(tf.all_variables()) summary_op = tf.summary.merge_all() # initialize all variables init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) summary_writer = tf.summary.FileWriter(FLAGS.end_2_end_train_dir, sess.graph) val_summary_writer = tf.summary.FileWriter(FLAGS.end_2_end_valid_dir) val_save_root = os.path.join(FLAGS.end_2_end_valid_dir,'visualization') compare_save_root = os.path.join(FLAGS.end_2_end_valid_dir,'deblur_compare') # resume weights resume(sess, FLAGS.resume_structure_predictor, FLAGS.structure_predictor_train_dir, 'voxel_flow_model_') resume(sess, FLAGS.resume_video_deblur, FLAGS.video_deblur_train_dir, 'video_deblur_model_') resume(sess, FLAGS.resume_resnet, FLAGS.resnet_train_dir, 'resnet_model_') resume(sess, FLAGS.resume_all, FLAGS.end_2_end_train_dir, '') # create data generator if FLAGS.training_period == 'pretrain': dataset = DataGenerator(data_dir, img_list, data_dir_valid, img_list_valid) dataset._create_train_sets_for_300W() dataset._create_valid_sets_for_300W() elif FLAGS.training_period == 'train': dataset = DataGenerator(data_dir,img_list) dataset._create_train_table() dataset._create_sets_for_300VW() else: raise NameError("No such training_period!") train_gen = dataset._aux_generator(batch_size = FLAGS.batch_size, num_input_imgs = num_input_imgs, NUM_CLASSES = POINTS_NUM*2, sample_set='train') valid_gen = dataset._aux_generator(batch_size = FLAGS.batch_size, num_input_imgs = num_input_imgs, NUM_CLASSES = POINTS_NUM*2, sample_set='valid') # main training process. for x in xrange(FLAGS.max_steps + 1): start_time = time.time() step = sess.run(global_step) i = [train_op, loss_] write_summary = step > 1 and not (step % 100) if write_summary: i.append(summary_op) i.append(resnet_model.logits) i.append(F_curr) i.append(H_curr) train_line_num, frame_name, input_boundaries, boundary_gt_train, input_images_blur_generated, landmark_gt_train = next(train_gen) if (frame_name == '2.jpg'): input_images_boundary_init = copy.deepcopy(input_boundaries) F_init = np.zeros([FLAGS.batch_size, IMAGE_SIZE//2, IMAGE_SIZE//2, structure_predictor_net_channel//2], dtype=np.float32) H_init = np.zeros([1, FLAGS.batch_size, IMAGE_SIZE//2, IMAGE_SIZE//2, structure_predictor_net_channel], dtype=np.float32) feed_dict={ input_images_boundary:input_images_boundary_init, input_images_blur:input_images_blur_generated, F:F_init, H:H_init, labels:landmark_gt_train, next_boundary_gt:boundary_gt_train, dropout_ratio:0.5 } else: output_points = o[-3] output_points = np.reshape(output_points,(POINTS_NUM,2)) boundary_from_points = points_to_heatmap_rectangle_68pt(output_points) boundary_from_points = np.expand_dims(boundary_from_points,axis=0) boundary_from_points = np.expand_dims(boundary_from_points,axis=3) input_images_boundary_init = np.concatenate([input_images_boundary_init[:,:,:,1:2], boundary_from_points], axis=3) feed_dict={ input_images_boundary:input_images_boundary_init, input_images_blur:input_images_blur_generated, F:o[-2], H:o[-1], labels:landmark_gt_train, next_boundary_gt:boundary_gt_train, dropout_ratio:0.5 } o = sess.run(i,feed_dict=feed_dict) loss_value = o[1] duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step > 1 and step % 300 == 0: examples_per_sec = FLAGS.batch_size / float(duration) format_str = ('step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (step, loss_value, examples_per_sec, duration)) if write_summary: summary_str = o[2] summary_writer.add_summary(summary_str, step) if step > 1 and step % 300 == 0: checkpoint_path = os.path.join(FLAGS.end_2_end_train_dir, 'model.ckpt') ensure_dir(checkpoint_path) saver_all.save(sess, checkpoint_path, global_step=global_step) # Run validation periodically if step > 1 and step % 300 == 0: valid_line_num, frame_name, input_boundaries, boundary_gt_valid, input_images_blur_generated, landmark_gt_valid = next(valid_gen) if (frame_name == '2.jpg') or valid_line_num <= 3: input_images_boundary_init = copy.deepcopy(input_boundaries) F_init = np.zeros([FLAGS.batch_size, IMAGE_SIZE//2, IMAGE_SIZE//2, structure_predictor_net_channel//2], dtype=np.float32) H_init = np.zeros([1, FLAGS.batch_size, IMAGE_SIZE//2, IMAGE_SIZE//2, structure_predictor_net_channel], dtype=np.float32) feed_dict={input_images_boundary:input_images_boundary_init, input_images_blur:input_images_blur_generated, F:F_init, H:H_init, labels:landmark_gt_valid, next_boundary_gt:boundary_gt_valid, dropout_ratio:1.0 } else: output_points = o_valid[-3] output_points = np.reshape(output_points,(POINTS_NUM,2)) boundary_from_points = points_to_heatmap_rectangle_68pt(output_points) boundary_from_points = np.expand_dims(boundary_from_points,axis=0) boundary_from_points = np.expand_dims(boundary_from_points,axis=3) input_images_boundary_init = np.concatenate([input_images_boundary_init[:,:,:,1:2], boundary_from_points], axis=3) feed_dict={ input_images_boundary:input_images_boundary_init, input_images_blur:input_images_blur_generated, F:o_valid[-2], H:o_valid[-1], labels:landmark_gt_valid, next_boundary_gt:boundary_gt_valid, dropout_ratio:1.0 } i_valid = [loss_,resnet_model.logits,F_curr,H_curr] o_valid = sess.run(i_valid,feed_dict=feed_dict) print('Validation top1 error %.2f' % o_valid[0]) if write_summary: val_summary_writer.add_summary(summary_str, step) img_video_deblur_output = sess.run(resnet_model.video_deblur_output,feed_dict=feed_dict)[0]*255 img = input_images_blur_generated[0,:,:,0:3]*255 compare_img = np.concatenate([img,img_video_deblur_output],axis=1) points = o_valid[1][0]*255 for point_num in range(int(points.shape[0]/2)): cv2.circle(img,(int(round(points[point_num*2])),int(round(points[point_num*2+1]))),1,(55,225,155),2) val_save_path = os.path.join(val_save_root,str(step)+'.jpg') compare_save_path = os.path.join(compare_save_root,str(step)+'.jpg') ensure_dir(val_save_path) ensure_dir(compare_save_path) cv2.imwrite(val_save_path,img) cv2.imwrite(compare_save_path,compare_img)
def main(): # Counting Dataset counting_dataset_path = 'counting_data_UCF' counting_dataset = list() train_labels = {} val_labels = {} for im_path in glob.glob(os.path.join(counting_dataset_path, '*.jpg')): counting_dataset.append(im_path) img = image.load_img(im_path) gt_file = im_path.replace('.jpg', '_ann.mat') h, w = img.size dmap, crowd_number = load_gt_from_mat(gt_file, (w, h)) train_labels[im_path] = dmap val_labels[im_path] = crowd_number counting_dataset_pyramid, train_labels_pyramid = multiscale_pyramid( counting_dataset, train_labels) # Ranking Dataset ranking_dataset_path = 'ranking_data' ranking_dataset = list() for im_path in glob.glob(os.path.join(ranking_dataset_path, '*.jpg')): ranking_dataset.append(im_path) # randomize the order of images before splitting np.random.shuffle(counting_dataset) split_size = int(round(len(counting_dataset) / 5)) splits_list = list() for t in range(5): splits_list.append(counting_dataset[t * split_size:t * split_size + split_size]) split_val_labels = {} mae_sum = 0.0 mse_sum = 0.0 # create folder to save results date = str(datetime.datetime.now()) d = date.split() d1 = d[0] d2 = d[1].split(':') results_folder = 'Results-' + d1 + '-' + d2[0] + '.' + d2[1] if not os.path.exists(results_folder): os.makedirs(results_folder) # 5-fold cross validation epochs = int(round(iterations / iterations_per_epoch)) n_fold = 5 for f in range(0, n_fold): print('\nFold ' + str(f)) # Model model = VGG16(include_top=False, weights='imagenet') transfer_layer = model.get_layer('block5_conv3') conv_model = Model(inputs=[model.input], outputs=[transfer_layer.output], name='vgg_partial') counting_input = Input(shape=(224, 224, 3), dtype='float32', name='counting_input') ranking_input = Input(shape=(224, 224, 3), dtype='float32', name='ranking_input') x = conv_model([counting_input, ranking_input]) counting_output = Conv2D(1, (3, 3), strides=(1, 1), padding='same', data_format=None, dilation_rate=(1, 1), activation='relu', use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, name='counting_output')(x) # The ranking output is computed using SUM pool. Here I use # GlobalAveragePooling2D followed by a multiplication by 14^2 to do # this. ranking_output = Lambda( lambda i: 14.0 * 14.0 * i, name='ranking_output')(GlobalAveragePooling2D( name='global_average_pooling2d')(counting_output)) train_model = Model(inputs=[counting_input, ranking_input], outputs=[counting_output, ranking_output]) train_model.summary() # l2 weight decay for layer in train_model.layers: if hasattr(layer, 'kernel_regularizer'): layer.kernel_regularizer = regularizers.l2(5e-4) elif layer.name == 'vgg_partial': for l in layer.layers: if hasattr(l, 'kernel_regularizer'): l.kernel_regularizer = regularizers.l2(5e-4) optimizer = SGD(lr=0.0, decay=0.0, momentum=0.9, nesterov=False) loss = { 'counting_output': euclideanDistanceCountingLoss, 'ranking_output': pairwiseRankingHingeLoss } loss_weights = [1.0, 0.0] train_model.compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights) splits_list_tmp = splits_list.copy() # counting validation split split_val = splits_list_tmp[f] del splits_list_tmp[f] flat = itertools.chain.from_iterable(splits_list_tmp) # counting train split split_train = list(flat) # counting validation split labels split_val_labels = {k: val_labels[k] for k in split_val} counting_dataset_pyramid_split = [] train_labels_pyramid_split = [] for key in split_train: counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][0]) counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][1]) counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][2]) counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][3]) counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][4]) train_labels_pyramid_split.append(train_labels_pyramid[key][0]) train_labels_pyramid_split.append(train_labels_pyramid[key][1]) train_labels_pyramid_split.append(train_labels_pyramid[key][2]) train_labels_pyramid_split.append(train_labels_pyramid[key][3]) train_labels_pyramid_split.append(train_labels_pyramid[key][4]) index_shuf = np.arange(len(counting_dataset_pyramid_split)) np.random.shuffle(index_shuf) counting_dataset_pyramid_split_shuf = [] train_labels_pyramid_split_shuf = [] for i in index_shuf: counting_dataset_pyramid_split_shuf.append( counting_dataset_pyramid_split[i]) train_labels_pyramid_split_shuf.append( train_labels_pyramid_split[i]) train_generator = DataGenerator(counting_dataset_pyramid_split_shuf, train_labels_pyramid_split_shuf, ranking_dataset, **params) lrate = LearningRateScheduler(step_decay) callbacks_list = [lrate] train_model.fit_generator(generator=train_generator, epochs=epochs, callbacks=callbacks_list) #test images tmp_model = train_model.get_layer('vgg_partial') test_input = Input(shape=(None, None, 3), dtype='float32', name='test_input') new_input = tmp_model(test_input) co = train_model.get_layer('counting_output')(new_input) test_output = Lambda(lambda i: K.sum(i, axis=(1, 2)), name='test_output')(co) test_model = Model(inputs=[test_input], outputs=[test_output]) predictions = np.empty((len(split_val), 1)) y_validation = np.empty((len(split_val), 1)) for i in range(len(split_val)): img = image.load_img(split_val[i], target_size=(224, 224)) img_to_array = image.img_to_array(img) img_to_array = preprocess_input(img_to_array) img_to_array = np.expand_dims(img_to_array, axis=0) pred_test = test_model.predict(img_to_array) predictions[i] = pred_test y_validation[i] = split_val_labels[split_val[i]] mean_abs_err = mae(predictions, y_validation) mean_sqr_err = mse(predictions, y_validation) # serialize model to JSON model_json = test_model.to_json() model_json_name = "test_model_" + str(f) + ".json" with open(model_json_name, "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model_h5_name = "test_model_" + str(f) + ".h5" test_model.save_weights(model_h5_name) print("Saved model to disk") print('\n######################') print('Results on TEST SPLIT:') print(' MAE: {}'.format(mean_abs_err)) print(' MSE: {}'.format(mean_sqr_err)) print("Took %f seconds" % (time.time() - s)) path1 = results_folder + '/test_split_results_fold-' + str(f) + '.txt' with open(path1, 'w') as f: f.write('mae: %f,\nmse: %f, \nTook %f seconds' % (mean_abs_err, mean_sqr_err, time.time() - s)) mae_sum = mae_sum + mean_abs_err mse_sum = mse_sum + mean_sqr_err print('\n################################') print('Average Results on TEST SPLIT:') print(' AVE MAE: {}'.format(mae_sum / n_fold)) print(' AVE MSE: {}'.format(mse_sum / n_fold)) print("Took %f seconds" % (time.time() - s)) path2 = results_folder + '/test_split_results_avg.txt' with open(path2, 'w') as f: f.write('avg_mae: %f, \navg_mse: %f, \nTook %f seconds' % (mae_sum / n_fold, mse_sum / n_fold, time.time() - s))
def main(): best_test_loss = np.inf model = Yolov1_vgg16bn(pretrained=True) print('pre-trained vgg16 model has loaded!') previous_model_path = model_name exists = os.path.isfile(previous_model_path) if exists: print("Starting from previous result...") model.load_state_dict(torch.load(previous_model_path)) else: print("Starting with new train") #print(model) print('') if use_gpu: model.cuda() # Data print('==> Preparing data..') transform = transforms.Compose([ transforms.ToTensor(), ]) #transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)) parent_dir, img_size, S, B, C, transforms, num = 15000): train_dataset = DataGenerator(parent_dir=img_folder, img_size=img_size, S=S, B=B, C=C, transform=transform, num=train_num, train=True) train_loader = DataLoader(train_dataset, batch_size=n_batch, shuffle=True, num_workers=8) test_dataset = DataGenerator(parent_dir=validate_folder, img_size=img_size, S=S, B=B, C=C, transform=transform, num=test_num, train=False) test_loader = DataLoader(test_dataset, batch_size=n_batch, shuffle=False, num_workers=8) model.train() train_val_loss_log = open( os.path.join(results_folder, 'train_val_loss_log'), 'w+') #loss_fn = YoloLoss(B, S, lambda_coord, lambda_noobj) loss_fn = YoloLossNew(B, S, C, lambda_coord, lambda_noobj) optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.0005) #optimizer = torch.optim.SGD(model.parameters(),lr=0.0001) scheduler = GradualWarmupScheduler(optimizer, multiplier=8, total_epoch=30) for epoch in range(num_epochs): scheduler.step(epoch) print(epoch, optimizer.param_groups[0]['lr']) for i, (img_name, images, target) in enumerate(train_loader): #images = images.float() #target = target.float() images = Variable(images) target = Variable(target) if use_gpu: images, target = images.cuda(), target.cuda() optimizer.zero_grad() pred = model(images) loss = loss_fn(pred, target) current_loss = loss.item() loss.backward() optimizer.step() if i % 20 == 0: print( "\r%d/%d batches in %d/%d iteration, current error is %f" % (i, len(train_loader), epoch + 1, num_epochs, current_loss)) save_model_by_epoch(epoch, model) # validat on validation set validation_loss = 0.0 model.eval() with torch.no_grad(): for i, (img_name, images, target) in enumerate(test_loader): #image = images.float() #target = target.float() images = Variable(images) target = Variable(target) if use_gpu: images, target = images.cuda(), target.cuda() pred = model(images) loss = loss_fn(pred, target) validation_loss += loss.item() validation_loss /= len(test_loader) # log the training loss and validation loss every epoch log_str = 'epoch: {}, train_loss: {}, val_loss: {} \n'.format( epoch + 1, current_loss, validation_loss) print(log_str) train_val_loss_log.writelines(log_str) train_val_loss_log.flush() if best_test_loss > validation_loss: best_test_loss = validation_loss save_torch_model(model, 'best.pth', epoch) train_val_loss_log.close()
with open(os.path.join(data_dir, 'train.pkl'), 'rb') as f: gs_selected_train, ocr_selected_train = pickle.load(f) with open(os.path.join(data_dir, 'val.pkl'), 'rb') as f: gs_selected_val, ocr_selected_val = pickle.load(f) with open(os.path.join(data_dir, 'ci.pkl'), 'rb') as f: ci = pickle.load(f) n_vocab = len(ci) dg_val = DataGenerator(xData=ocr_selected_val, yData=gs_selected_val, char_to_int=ci, seq_length=seq_length, padding_char=pc, oov_char=oc, batch_size=batch_size, shuffle=shuffle) dg_train = DataGenerator(xData=ocr_selected_train, yData=gs_selected_train, char_to_int=ci, seq_length=seq_length, padding_char=pc, oov_char=oc, batch_size=batch_size, shuffle=shuffle) # create the network model = Sequential()
def train_model(Dataset,CompileModel,TrainModel,name_data,name_model,name_weights, num_data_tr,num_data_val,patch_size,num_epochs,lr,params): # ============================================================================= # Configuration # ============================================================================= ''' Paths for dataset ''' path_home = os.getcwd() tr_path = os.path.join(path_home, 'dataset', name_data, 'train') vl_path = os.path.join(path_home, 'dataset', name_data, 'validation') tdpath = os.path.join(tr_path, 'seis') tfpath = os.path.join(tr_path, 'fault') vdpath = os.path.join(vl_path, 'seis') vfpath = os.path.join(vl_path, 'fault') ''' Paths for model, weights, and metircs ''' path_model = os.path.join(path_home, 'model') path_model_arch = os.path.join(path_model, name_model + '.json') path_weights = os.path.join(path_model, 'weights', name_weights + '.h5') path_hists = os.path.join(path_model, 'weights', name_weights + '_hist.txt') path_cb = os.path.join(path_model, 'call_back', name_weights) t0 = time() # ============================================================================= # Build Training & Validation Dataset # ============================================================================= if Dataset: ''' Generate Synthetic Data ''' print('Generating Training Data') SyntheticSeisGen(tr_path, num_data_tr, patch_size) print('\nGenerating Validation Data') SyntheticSeisGen(vl_path, num_data_val, patch_size) print('\nSaving Dataset') else: if not (os.path.exists(tr_path) | os.path.exists(vl_path)): print("Please Create Dataset First!") # ============================================================================= # Create 3D Convolutional Neural Network Model # ============================================================================= if CompileModel: ''' Compile CNN Model ''' print('Creating CNN Model') conv_model = create_model((*[int(patch_size)]*3,1), lr) model = conv_model.model ''' Save CNN Model ''' json_string = model.to_json() open(path_model_arch,'w').write(json_string) else: ''' Load CNN Model ''' print('Loading CNN Model') json_file = open(path_model_arch, 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.compile(optimizer=optimizers.Adam(lr), loss=cross_entropy_balanced, metrics=['accuracy']) # ============================================================================= # Train CNN Model # ============================================================================= if TrainModel: ''' Callbacks Configuration ''' cp_fn = os.path.join(path_cb, 'checkpoint.{epoch:02d}.h5') cp_cb = callbacks.ModelCheckpoint(filepath=cp_fn, verbose=1, save_best_only=False) csv_fn = os.path.join(path_cb, 'train_log.csv') csv_cb = callbacks.CSVLogger(csv_fn, append=True, separator=';') tb_cb = callbacks.TensorBoard(log_dir=path_cb, histogram_freq=0, batch_size=2, write_graph=True, write_grads=True, write_images=True) cbks = [cp_cb, csv_cb, tb_cb] ''' Train CNN Model ''' print('\nModel Fitting') tdata_IDs = range(num_data_tr) vdata_IDs = range(num_data_val) tr_gen = DataGenerator(dpath=tdpath,fpath=tfpath,data_IDs=tdata_IDs,**params) val_gen = DataGenerator(dpath=vdpath,fpath=vfpath,data_IDs=vdata_IDs,**params) history = model.fit_generator(generator=tr_gen, validation_data=val_gen, epochs=num_epochs,verbose=1,callbacks=cbks) history_dict = history.history ''' Save Weights & Metrics ''' model.save_weights(path_weights) json.dump(history_dict, open(path_hists, 'w')) else: ''' Load Metrics & Trained Model Weights ''' model.load_weights(path_weights) history_dict = json.load(open(path_hists, 'r')) print('\nElapsed time: ' + "{:.2f}".format((time()-t0)/60) + ' min') return model, history_dict
x_vecs = x_mean + np.dot(v, (rand_vecs * e).T).T y_faces = func([x_vecs, 0])[0] for i in range(y_faces.shape[0]): save_image(y_faces[i], 'rand' + str(i) + '.png') if i < 5 and (iters % 10) == 0: if not os.path.exists('morph' + str(i)): os.makedirs('morph' + str(i)) save_image(y_faces[i], 'morph' + str(i) + '/img' + str(iters) + '.png') make_rand_faces(rand_vecs, 0) print("Training...") datagen = DataGenerator(batch_size=BATCH_SIZE) callbacks = [TensorBoard()] train_loss = [] for iters in trange(NUM_EPOCHS): history = model.fit_generator(datagen, callbacks=callbacks) loss = history.history['loss'][-1] train_loss.append(loss) print("Loss: " + str(loss)) plotScores(train_loss, [], 'EncoderScores.png', True) if iters % 1 == 0: model.save('Encoder.h5')
def main(args): lstm_dim = 512 n_answers = 1001 question_embed_dim = 256 qa_data = h5.File(os.path.join(args.data_path, "data_prepro.h5"), "r") with open(os.path.join(args.data_path, "data_prepro.json"), "r") as file: prepro_data = json.load(file) if args.extracted: img_feat = h5.File(os.path.join(args.data_path, "data_img.h5"), "r")['images_test'] else: print("Loading images") img_feat = [ img_to_array(load_img(os.path.join(args.data_path, image_filename), target_size=(224, 224)), dtype='uint8', data_format='channels_first') for image_filename in prepro_data['unique_img_test'] ] img_feat = np.array(img_feat, dtype=np.uint8) VOCAB_SIZE = len(prepro_data['ix_to_word']) MAX_QUESTION_LEN = qa_data['ques_test'].shape[1] SOS = VOCAB_SIZE + 1 # Add 1 for SOS and 1 for '0' -> padding VOCAB_SIZE += 2 # Add SOS char at the beginning for every question questions = np.zeros((qa_data['ques_test'].shape[0], MAX_QUESTION_LEN + 1)) questions[:, 1:] = qa_data['ques_test'] questions[:, 0] = SOS ques_to_img = np.array(qa_data['img_pos_test']) ix_to_ans = prepro_data['ix_to_ans'] question_ids = np.array(qa_data['question_id_test']).tolist() n_test = len(question_ids) # Define appropriate model if args.model_type == 'img_ques_att': model = ImgQuesAttentionNet(lstm_dim=lstm_dim, n_answers=n_answers, model_path=os.path.basename( args.model_path), VOCAB_SIZE=VOCAB_SIZE, MAX_QUESTION_LEN=MAX_QUESTION_LEN, question_embed_dim=question_embed_dim, log_path=None) elif args.model_type == 'show_n_tell': model = ShowNTellNet(lstm_dim=lstm_dim, n_answers=n_answers, model_path=os.path.basename(args.model_path), VOCAB_SIZE=VOCAB_SIZE, MAX_QUESTION_LEN=MAX_QUESTION_LEN, question_embed_dim=question_embed_dim, log_path=None) elif args.model_type == 'ques_att': model = QuesAttentionShowNTellNet( lstm_dim=lstm_dim, n_answers=n_answers, model_path=os.path.basename(args.model_path), VOCAB_SIZE=VOCAB_SIZE, MAX_QUESTION_LEN=MAX_QUESTION_LEN, question_embed_dim=question_embed_dim, log_path=None) elif args.model_type == 'conv_attention': model = ConvAttentionNet(lstm_dim=lstm_dim, n_answers=n_answers, model_path=os.path.basename(args.model_path), VOCAB_SIZE=VOCAB_SIZE, MAX_QUESTION_LEN=MAX_QUESTION_LEN, question_embed_dim=question_embed_dim, log_path=None) elif args.model_type == 'time_dist_cnn': model = TimeDistributedCNNNet(lstm_dim=lstm_dim, n_answers=n_answers, model_path=os.path.basename( args.model_path), VOCAB_SIZE=VOCAB_SIZE, MAX_QUESTION_LEN=MAX_QUESTION_LEN, question_embed_dim=question_embed_dim, log_path=None) model.load_weights(weights_filename=args.model_path) chunk_size = 100000000 y_pred = np.zeros(n_test, dtype=np.int) n_chunks = len(range(0, n_test, chunk_size)) for i, batch in enumerate(range(0, n_test, chunk_size)): begin = batch end = min(n_test, batch + chunk_size) # Test data generator test_datagen = DataGenerator(img_feat=np.array(img_feat), questions=questions[begin:end], answers=[], ques_to_img=ques_to_img[begin:end], VOCAB_SIZE=VOCAB_SIZE, n_answers=n_answers, batch_size=args.batch_size, shuffle=False, split='test') y_pred_chunk = model.predict(test_data=test_datagen) if (i + 1) % 50 == 0: print("Completed testing on {}/{} chunks...".format( i + 1, n_chunks)) y_pred[begin:end] = y_pred_chunk write_predictions(filepath=args.dest_path, y_pred=y_pred, ix_to_ans=ix_to_ans, question_ids=question_ids)
return params if __name__ == '__main__': # add some parameters from the terminal parser = argparse.ArgumentParser(description='Launch the training of the Hourglass model.', add_help=True, epilog='Just a test for this parameter') parser.add_argument('--version', action='version', version='Version 1.0') parser.add_argument('--cfg', required=False, default = './config.cfg', help='The path for your config file') args = parser.parse_args() print('>>>>> Parsing Config File From %s' %(args.cfg)) params = process_config(args.cfg) print('>>>>> Creating Dataset Now') # dataset.train_set is the table of the training set's names dataset = DataGenerator(joints_name = params['joint_list'],img_dir = params['img_directory'], train_data_file = params['training_txt_file'], camera_extrinsic = params['camera_extrinsic'], camera_intrinsic = params['camera_intrinsic']) dataset._create_train_table() # nfeats:256, nstacks:4 nmodules:1(not used) # nlow:4 (Number of downsampling in one stack) # mcam:false (attention system(not needed)) # name:pretrained model # tiny:false weighted_loss:false os.environ["CUDA_VISIBLE_DEVICES"] = "0" model = HourglassModel(nFeat=params['nfeats'], nStack=params['nstacks'], nModules=params['nmodules'], nLow=params['nlow'], outputDim=params['num_joints'], batch_size=params['batch_size'], training=True, drop_rate= params['dropout_rate'], lear_rate=params['learning_rate'], decay=params['learning_rate_decay'], decay_step=params['decay_step'], dataset=dataset, name=params['name'], w_summary = True, logdir_train=params['log_dir_train'], logdir_test=params['log_dir_test'], tiny= params['tiny'], w_loss=params['weighted_loss'] , joints= params['joint_list'], gpu_frac=params['gpu_frac'], model_save_dir=params['model_save_dir']) print('>>>>> Creating Hourglass Model')
for option in config.options(section): params[option] = eval(config.get(section, option)) if section == 'Saver': for option in config.options(section): params[option] = eval(config.get(section, option)) return params if __name__ == '__main__': print('--Parsing Config File') params = process_config('config.cfg') print('--Creating Dataset') dataset1 = DataGenerator(params['joint_list'], params['img_directory1'], params['training_txt_file1'], remove_joints=params['remove_joints'], img_dir_test=params['img_directory_test1'], test_data_file=params['test_txt_file1'], train_3D_gt=params['train_3d_gt'], test_3D_gt=params['test_3d_gt']) dataset2 = DataGenerator(params['joint_list'], params['img_directory2'], params['training_txt_file2'], remove_joints=params['remove_joints'], img_dir_test=params['img_directory_test2'], test_data_file=params['test_txt_file2']) dataset3 = DataGenerator(params['joint_list'], params['img_directory3'], params['training_txt_file3'], remove_joints=params['remove_joints'], img_dir_test=params['img_directory_test3'], test_data_file=params['test_txt_file3'])
def do_ddt_runs(expt): gen = DataGenerator(expt.num_phonemes, expt.num_features, expt.var_diag_interval, expt.var_offdiag_interval) perfect_practice_data = gen.generate_simulated_data( expt.num_practice_frames) practice_data, num_practice_errors = gen.add_errors_to_data( perfect_practice_data, expt.practice_error_rate) practice_data_dict = partition_data(practice_data) # We got some practice data for every point, right? assert (len(practice_data_dict.keys() == expt.num_phonemes)) test_data = gen.generate_simulated_data(expt.num_test_frames) n = expt.num_training_frames assert (n * expt.training_error_rate >= 5) # number of errorful points assert (n * (1 - expt.training_error_rate) > 5) # number of correct points error_training_frame_indices = range(0, 5) correct_training_frame_indices = range(n - 5, n) all_results = {} all_results['Error'] = [] all_results['Correct'] = [] for run_idx in range(0, expt.num_runs): training_data, num_errors = make_training_data(gen, expt) c = SimpleClassifier(gen.get_labels(), gen.num_features) c.train_all(training_data) def run_some_frames(frame_indices): frame_results = [] for i in frame_indices: label = training_data[i][0] a = SimpleAllele(c, [label]) # subtract (label, frame) from training_data for active phoneme alt_data = training_data[:i] + training_data[i + 1:] # train alternate model in allele on alternate data a.train_variants(alt_data) # print a.make_details_string() # Construct a subset of the practice data with only the points # which are labelled with the active label of the allele (see comments below). data = [(label, point) for point in practice_data_dict[label]] results = measurePrimaryAndVariantAccuracy(a, data) # KJB - here's the original version, in which we just # used all the practice data This essential means we # aren't using the practice data labels at all, which # might be an interesting variation, but isn't the # original intention. #results = measurePrimaryAndVariantAccuracy(a, practice_data) frame_results.append(results) return frame_results error_results = run_some_frames(error_training_frame_indices) all_results['Error'].append(error_results) correct_results = run_some_frames(correct_training_frame_indices) all_results['Correct'].append(correct_results) return all_results
if __name__ == '__main__': environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" environ["CUDA_VISIBLE_DEVICES"] = "7" print('--Parsing Config File') params = process_config('config.cfg') print('--Creating Dataset') dataset = DataGenerator(params['img_directory'], params['training_txt_file'], params['num_joints'], params['val_directory'], params['val_txt_file'], params['test_directory'], params['test_txt_file'], params['resolutions'], params['headed'], head_train=params['head_train'], head_test=params['head_test'], head_val=params['head_val']) dataset._create_test_table( ) #creates the lists with dicts of the coord. of boxes, joints and the corresp. weights model = HourglassModel(nFeat=params['nfeats'], nStack=params['nstacks'], nModules=params['nmodules'], nLow=params['nlow'], outputDim=params['num_joints'], batch_size=params['batch_size'], attention=params['mcam'],
allow_pickle=True).item() labels_dict = np.load( '/dccstor/cmv/MovieSummaries/embeddings/dummy_labels.npy', allow_pickle=True).item() print('\nDone Loading') train_ids, val_ids, train_labels, val_labels = train_test_split( list(labels_dict.keys()), list(labels_dict.values()), test_size=0.2, random_state=42) train_generator = DataGenerator(mode='train', data_dict=embedding_dict, list_IDs=train_ids, labels_dict=labels_dict, num_classes=NUM_CLASSES, batch_size=TRAIN_BATCH_SIZE, shuffle=True) valid_generator = DataGenerator(mode='val', data_dict=embedding_dict, list_IDs=val_ids, labels_dict=labels_dict, num_classes=NUM_CLASSES, batch_size=VAL_BATCH_SIZE, shuffle=False) with tf.device('/cpu:0'): model = BiLSTM(num_classes=NUM_CLASSES, reg=args.regularization, reg_wt=args.regularization_weight)
from no_batch import MlpSingle from MLP_batch import MlpBatch import platform import numpy as np if __name__ == '__main__': if platform.system() == 'Windows': folder = 'C:/data/train_data' elif platform.system() == 'Linux': folder = '/home/shaoheng/Documents/PythonPractice/handwritedigit' batch = 5 class_num = 10 data_generator = DataGenerator(folder, batch, (16, 16), class_num=class_num) model = MlpBatch(input_nodes=16 * 16, hidden_nodes=(12, class_num), batch_size=batch) right = 0 for i in range(1000000): if (i + 1) % 100 == 0: print('acc=%.2f' % (right / (i * batch) * 100), '%') x, y = data_generator.load_data() out = model.forward_prop(x) model.back_prop(x, y) for b in range(batch):
help="path to testing dataset") ap.add_argument("-c", "--csv", required=True, help="path to testing CSV file") ap.add_argument("-m", "--model", required=True, help="path to model (.h5)") ap.add_argument("-p", "--pred", required=True, help="path to prediction results") args = vars(ap.parse_args()) data_test = pd.read_csv(args['csv']) data_test["file_path"] = data_test["Id"].apply( lambda x: os.path.join(args['dataset'], str(x) + ".npy")) test_model = load_model(args['model']) test_generator = DataGenerator(data_test['file_path'], batch_size=50, test=True, shuffle=False) final_result = test_model.predict_generator(generator=test_generator) compiled_result = pd.DataFrame(final_result) compiled_result = compiled_result.rename(columns={ "Unnamed: 0": "Id", '0': 'Predicted' }) if not os.path.isdir(os.path.dirname(args['pred'])): os.makedirs(os.path.dirname(args['pred'])) compiled_result.to_csv(args['pred'])
return -1 * (x - y) / ((x - 1) * x + eps) if __name__ == '__main__': if platform.system() == 'Windows': folder = 'C:/data/train_data' test_folder = 'C:/data/test_data' elif platform.system() == 'Linux': folder = '/home/shaoheng/Documents/PythonPractice/handwritedigit' batch = 32 class_num = 10 data_generator = DataGenerator(folder, batch, (16, 16), class_num=class_num) valid_data_gen = DataGenerator(test_folder, 160, (16, 16), class_num=class_num) valid_x, valid_y = valid_data_gen.load_data() train_data_gen = DataGenerator(folder, 320, (16, 16), class_num=class_num) train_full_x, train_full_y = train_data_gen.load_data() def forward(x): for deep, now_layer in enumerate(model): if type(now_layer) == Conv2D and deep == 0: x = np.expand_dims(x, -1) elif type(now_layer) == FC:
val_images = [] for folder in ['test\\fake', 'test\\real']: for image in os.listdir(folder): val_images.append(os.path.join(folder, image)) val_labels = {} for image in tqdm(val_images): if image.split('\\')[1] == 'real': val_labels[image] = 0 else: val_labels[image] = 1 train_gen = DataGenerator(train_images, train_labels, batch_size=bs, dim=dim, type_gen='train') val_gen = DataGenerator(val_images, val_labels, batch_size=bs, dim=dim, type_gen='test') X, Y = train_gen[0] print(len(X), X[0].shape, X[1].shape) print(Y) fig = plt.figure(figsize=(8, 8)) columns = 4
for i in partition["train"]: labels[i]=float(angle[i])* scipy.pi / 180 for i in partition["validation"]: labels[i]=float(angle[i])* scipy.pi / 180 # Parameters for datagen.py params = {'dim': (66,200,3), 'batch_size': 32, 'shuffle': True} # Generators training_generator = DataGenerator(partition["train"], labels, **params) validation_generator = DataGenerator(partition["validation"], labels, **params) #defining our model and compile with adam optimizer and mean squere error. model=defineModel() model.compile(optimizer='adam', loss="mse") #train it for 10 epochs model.fit_generator(generator=training_generator, epochs=10, validation_data=validation_generator) #save trained model. model.save("model.h5")