Exemple #1
0
def setup_gens(data, confs):
    """
    Create training/validation generators.
    """
    t_config, v_config = confs

    train_data = [d for d in data if d['train']==True]
    val_data = [d for d in data if d['train']==False]

    train_gen = DataGenerator(train_data, t_config)
    val_gen = DataGenerator(val_data, v_config)

    return train_gen, val_gen
Exemple #2
0
def preprocess_chipseq(num_jobs, bin_size):
    datagen = DataGenerator()
    processes = []

    celltypes = datagen.get_celltypes()
    transcription_factors = datagen.get_trans_fs()

    for part in ['train']:
        with open('../data/annotations/%s_regions.blacklistfiltered.merged.bed' % part) as fin:
            lines = fin.read()

        for celltype in celltypes:
            for transcription_factor in transcription_factors:
                if not os.path.exists('../data/chipseq_fold_change_signal/ChIPseq.%s.%s.fc.signal.train.bw'
                                      % (celltype, transcription_factor)):
                    continue
                fout_path = '../data/preprocess/CHIPSEQ_FEATURES/%s_%s_%d.gz' % (
                                    celltype, transcription_factor, bin_size)
                if not os.path.exists(fout_path):
                    processes.append(
                        Process(target=parralelChIPSeqSignalProcessor,
                                args=(lines, fout_path, celltype, transcription_factor, bin_size)))

    for i in range(0, len(processes), num_jobs):
        map(lambda x: x.start(), processes[i:i + num_jobs])
        map(lambda x: x.join(), processes[i:i + num_jobs])
Exemple #3
0
def simple_test(expt):
    # Build a generator and a classifier that are perfectly matched
    # with respect to means and see what sort of error rate we get for
    # various variance values in the generator.
    gen = DataGenerator(expt.num_phonemes, expt.num_features,
                        expt.var_diag_interval, expt.var_offdiag_interval)
    test_data = gen.generate_simulated_data(expt.num_test_frames)

    # Make perfect "training data" in the form of two points for each
    # class whose mean is exactly the mean for that class.  Training
    # on this will give a correct mean for the model, but with some
    # non-zero variance

    labels = gen.get_labels()
    means = [array(target) for target in gen._targets]

    # Construct a list of (label, point) pairs with two points for each label
    delta = [0.1] * expt.num_features
    assert len(labels) == len(means)
    data = zip(labels,
               (m + delta for m in means)) + zip(labels,
                                                 (m - delta for m in means))
    # print dump_data(data)

    c = SimpleClassifier(labels, gen.num_features)
    c.train_all(data)

    (rate, results) = measureAccuracy(c, test_data)
    summary = make_summary_string("Simple test", rate, results, c, test_data,
                                  gen)
    print summary
Exemple #4
0
def main(save=True):
    """ Train a model \n
        
        ave {bool} - whether to save the trained model (default: True) \n
        
        Returns: wrapper RNN class for a Keras model (e.g. keras.models.Sequential) """
    startTime = time()
    trainingSet, validationSet, scaler = setup()
    trainGen = DataGenerator(trainingSet,
                             scaler,
                             windowSize=WINDOW_SIZE,
                             lookback=LOOKBACK,
                             sampleRate=SAMPLERATE,
                             prediction=PREDICTION).generator()
    validGen = DataGenerator(validationSet,
                             scaler,
                             windowSize=WINDOW_SIZE,
                             lookback=LOOKBACK,
                             sampleRate=SAMPLERATE,
                             prediction=PREDICTION).generator()
    rnn = RNN(HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION)
    optimizer = rnn.pickOptimizer(OPTIMIZER, lr=LEARNING_RATE)
    rnn.model.compile(loss=LOSS_FUNC, optimizer=optimizer)
    rnn.model.fit_generator(trainGen,
                            steps_per_epoch=STEPS_PER_EPOCH,
                            epochs=EPOCHS,
                            validation_data=validGen,
                            validation_steps=VALIDATION_STEP_PER_EPOCH,
                            verbose=2,
                            shuffle=False)
    endTime = time()
    print(
        f"\nTRAINING DONE. Total time elapsed: {strftime('%H:%M:%S', gmtime(endTime - startTime))}"
    )
    if save:
        weightsFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK,
                                        WINDOW_SIZE, SAMPLERATE, PREDICTION,
                                        WEIGHT_EXT)
        architectureFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK,
                                             WINDOW_SIZE, SAMPLERATE,
                                             PREDICTION, ARCHITECT_EXT)
        rnn.saveWeights(weightsFile)
        rnn.saveArchitecture(architectureFile)
    return rnn
Exemple #5
0
 def __init__(self, config_file='config.cfg', model='hg_refined_tiny_200'):
     """ Initilize the Predictor
     Args:
         config_file 	 	: *.cfg file with model's parameters
         model 	 	 	 	: *.index file's name. (weights to load)
     """
     t = time()
     params = process_config(config_file)
     datatest1 = DataGenerator(joints_name=params['joint_list'],
                               img_dir_test=params['img_directory_test1'],
                               test_data_file=params['test_txt_file1'],
                               remove_joints=params['remove_joints'])
     datatest1._create_test_table()
     datatest2 = DataGenerator(joints_name=params['joint_list'],
                               img_dir_test=params['img_directory_test2'],
                               test_data_file=params['test_txt_file2'],
                               remove_joints=params['remove_joints'])
     datatest2._create_test_table()
     datatest3 = DataGenerator(joints_name=params['joint_list'],
                               img_dir_test=params['img_directory_test3'],
                               test_data_file=params['test_txt_file3'],
                               remove_joints=params['remove_joints'])
     datatest3._create_test_table()
     datatest4 = DataGenerator(joints_name=params['joint_list'],
                               img_dir_test=params['img_directory_test4'],
                               test_data_file=params['test_txt_file4'],
                               remove_joints=params['remove_joints'])
     datatest4._create_test_table()
     self.predict = PredictProcessor(params)
     self.predict.color_palette()
     self.predict.LINKS_JOINTS()
     self.predict.model_init()
     self.predict.load_model(load=model)
     self.predict._create_prediction_tensor()
     # self.predict.compute_pck(datagen=datatest,idlh=9,idrs=2)
     # self.predict.save_output_as_mat(datagen=datatest,idlh=9,idrs=2)
     self.predict.save_multioutput_as_mat(datagen1=datatest1,
                                          datagen2=datatest2,
                                          datagen3=datatest3,
                                          datagen4=datatest4,
                                          idlh=9,
                                          idrs=2)
     print('Done: ', time() - t, ' sec.')
Exemple #6
0
    def train(self):

        history = self.model.fit(DataGenerator(batch_size=5),
                                 verbose=1,
                                 epochs=20,
                                 callbacks=[self.callback, self.checkpoint])

        model_hist = history.history

        with open('Data/model_results', 'wb') as file:
            pickle.dump(model_hist, file)
            self.model.save_weights('Data/model/model_weights.h5')
Exemple #7
0
 def get_train_data(self):
     self.datagen = DataGenerator(self.mdp)
     if self.generate_new_episodes:
         self.train_episodes = self.datagen.gen_episodes( \
             self.num_train_episodes, self.path)
     else:
         self.train_episodes = np.load(self.path)
     self.train_images = np.array([ep[0] for ep in self.train_episodes])
     self.train_actions = np.array([ep[1] for ep in self.train_episodes])
     self.train_reward_labs = np.array([ep[2] for ep in self.train_episodes])
     self.train_qval_labs = np.array([ep[3] for ep in self.train_episodes])
     self.train_label_to_im_dict = self.datagen.label_to_im_dict
Exemple #8
0
def do_baseline_runs(expt):
    gen = DataGenerator(expt.num_phonemes, expt.num_features,
                        expt.var_diag_interval, expt.var_offdiag_interval)

    all_results = []
    for run_idx in range(expt.num_runs):
        test_data = gen.generate_simulated_data(expt.num_test_frames)

        # There's a problem here if there's only one data point, since
        # then we end up with a variance of 0.  We currently hack
        # around this problem by guaranteeing more than one point.  We
        # could change the models to allow zero variance but this will
        # mean not being able to make samples from the models without
        # some extra work.  Note that we don't care at all about order
        # of training data in these experiments, so we just build our
        # training data in two parts and cat them together.  If you
        # hit either of these asserts, you're asking for an error rate
        # that's too hig and/or a training data size that's too low.
        # We need two correct samples per phoneme.
        num_secondary_frames = expt.num_training_frames - expt.num_phonemes * 2
        num_errorful_frames = expt.num_training_frames * expt.training_error_rate
        assert expt.num_training_frames >= expt.num_phonemes * 2
        assert num_secondary_frames > num_errorful_frames
        errorless_training_data = gen.generate_simulated_data_per_phoneme(2)
        secondary_training_data = gen.generate_simulated_data(
            num_secondary_frames)

        # Slight trickiness to get a correct error rate for this subset of the data
        subset_error_rate = float(num_errorful_frames) / num_secondary_frames
        errorful_training_data, num_errors = gen.add_errors_to_data(
            secondary_training_data, subset_error_rate)

        practice_data = gen.generate_simulated_data(expt.num_practice_frames)
        errorful_practice_data, num_errors = gen.add_errors_to_data(
            practice_data, expt.practice_error_rate)

        training_data = errorless_training_data + errorful_training_data + errorful_practice_data

        c = SimpleClassifier(gen.get_labels(), gen.num_features)
        c.train_all(training_data)

        (rate, results) = measureAccuracy(c, test_data)
        name = "Baseline 0.%d" % (run_idx, )
        summary = make_summary_string(name, rate, results, c, test_data, gen)
        all_results.append((name, rate))

        # print "Classifier:\n"
        # print c.to_string()
        # print summary
    print "\n--------------------------Summary-----------------------"
    print make_all_runs_summary_string(expt, all_results)
Exemple #9
0
def do_simple_allele_test(expt):
    gen = DataGenerator(expt.num_phonemes, expt.num_features,
                        expt.var_diag_interval, expt.var_offdiag_interval)
    test_data = gen.generate_simulated_data(expt.num_test_frames)

    for run_idx in range(0, expt.num_runs):
        training_data, num_errors = make_training_data(gen, expt)
        # select training data frames to be tested, put into sample_training_frames
        # sample_training_frames is a subset of the training data consisting of some
        # errorful frames and some correct frames - we hope to identify the
        # incorrect frames

        # For now, use first 5 frames and last 5.  The former will have errors and the
        # latter will be correct
        n = len(training_data)
        assert (n * expt.training_error_rate > 5)  # number of errorful points
        assert (n * (1 - expt.training_error_rate) > 5
                )  # number of correct points
        sample_training_frame_indices = range(0, 5) + range(n - 5, n)

        c = SimpleClassifier(gen.get_labels(), gen.num_features)
        c.train_all(training_data)

        all_results = []
        for i in sample_training_frame_indices:
            label = training_data[i][0]
            a = SimpleAllele(c, [label])

            # subtract (label, frame) from training_data for active phoneme
            alt_data = training_data[:i] + training_data[i + 1:]

            # train alternate model in allele on alternate data
            a.train_variants(alt_data)
            # print a.make_details_string()

            results = measurePrimaryAndVariantAccuracy(a, test_data)
            print results
            all_results.append(results)
        print 'End run %d \n' % (run_idx, )
Exemple #10
0
def preprocess_dnase(num_jobs, bin_size):
    datagen = DataGenerator()
    processes = []

    celltypes = datagen.get_celltypes()

    for part in ['train', 'ladder', 'test']:

        with open('../data/annotations/%s_regions.blacklistfiltered.merged.bed' % part) as fin:
            lines = fin.read()

        for celltype in celltypes:

            if not os.path.exists('../data/preprocess/DNASE_FEATURES/%s_%s_%d.txt' % (celltype, part, bin_size)):
                fout_path = '../data/preprocess/DNASE_FEATURES/%s_%s_%d.gz' % (celltype, part, bin_size)
                processes.append(
                    Process(
                        target=parralelDNAseSignalProcessor,
                        args=(lines, fout_path, celltype, bin_size)))

    num_processes = num_jobs
    for i in range(0, len(processes), num_processes):
        map(lambda x: x.start(), processes[i:i + num_processes])
        map(lambda x: x.join(), processes[i:i + num_processes])
import tensorflow as tf
import os

tf.app.flags.DEFINE_string("configfile", "config/config_mpii.cfg",
                           "config file name")
tf.app.flags.DEFINE_string("loadmodel", None,
                           "model name used to continue training")

FLAGS = tf.app.flags.FLAGS

if __name__ == '__main__':
    print('--Parsing Config File')
    params = process_config(FLAGS.configfile)
    os.system('mkdir -p {}'.format(params['saver_directory']))
    os.system('cp {0} {1}'.format(FLAGS.configfile, params['saver_directory']))

    print('--Creating Dataset')
    dataset = DataGenerator(params['joint_list'], params['img_directory'],
                            params['training_txt_file'], params['img_size'])
    dataset._create_train_table()
    dataset._randomize()
    dataset._create_sets()

    model = HourglassModel(params=params, dataset=dataset, training=True)
    model.create_model()
    model.do_train(nEpochs=params['nepochs'],
                   epochSize=params['epoch_size'],
                   saveStep=params['saver_step'],
                   dataset=None,
                   load=FLAGS.loadmodel)
                "--model",
                required=True,
                help="path to output model (.h5)")
args = vars(ap.parse_args())

data_train = pd.read_csv(args['csv'])
data_train["file_path"] = data_train["Id"].apply(
    lambda x: os.path.join(args['dataset'],
                           str(x) + ".npy"))

X_train, X_val, y_train, y_val = train_test_split(data_train["file_path"],
                                                  data_train["Label"],
                                                  stratify=data_train["Label"],
                                                  test_size=0.2,
                                                  random_state=42)
training_generator = DataGenerator(X_train, batch_size=50, shuffle=True)
validation_generator = DataGenerator(X_val)

model = MalwareModel()
model = model.create()
print(model.summary())

model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    epochs=15)

if not os.path.isdir(os.path.dirname(args['model'])):
    os.makedirs(os.path.dirname(args['model']))

model.save(args['model'])
Exemple #13
0
def predict(modelpath, UNTRAINED_MODEL=False):
    if UNTRAINED_MODEL:
        rnn = RNN(HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, 1)
    else:
        rnn = loadTrainedModel(modelpath)

    trainingSet, validationSet, scaler = setup()
    testSet = readDataset(TEST_SET)

    if rnn.sampleRate < rnn.windowSize:
        trainGen = DataGenerator(trainingSet,
                                 scaler,
                                 windowSize=rnn.windowSize,
                                 lookback=rnn.lookBack,
                                 sampleRate=rnn.windowSize)
        validateGen = DataGenerator(validationSet,
                                    scaler,
                                    windowSize=rnn.windowSize,
                                    lookback=rnn.lookBack,
                                    sampleRate=rnn.windowSize)
        testGen = DataGenerator(testSet,
                                scaler,
                                windowSize=rnn.windowSize,
                                lookback=rnn.lookBack,
                                sampleRate=rnn.windowSize)
        batchLength = rnn.windowSize

    else:
        trainGen = DataGenerator(trainingSet,
                                 scaler,
                                 windowSize=rnn.windowSize,
                                 lookback=rnn.lookBack,
                                 sampleRate=rnn.sampleRate)
        validateGen = DataGenerator(validationSet,
                                    scaler,
                                    windowSize=rnn.windowSize,
                                    lookback=rnn.lookBack,
                                    sampleRate=rnn.sampleRate)
        testGen = DataGenerator(testSet,
                                scaler,
                                windowSize=rnn.windowSize,
                                lookback=rnn.lookBack,
                                sampleRate=rnn.sampleRate)
        batchLength = rnn.sampleRate  # or sampleRate * windowSize?

    trainingSetTrueSize = TRAINING_DATASIZE - trainGen.maxStepIndex - trainGen.minIndex
    validationSetTrueSize = VALIDATION_DATASIZE - validateGen.maxStepIndex - validateGen.minIndex
    testSetTrueSize = TEST_DATASIZE - testGen.maxStepIndex - testGen.minIndex
    trainStep = int(trainingSetTrueSize / batchLength)
    validateStep = int(validationSetTrueSize / batchLength)
    testStep = int(testSetTrueSize / batchLength)

    if DEBUG:
        print(
            f"trainStep: {trainStep}, validationStep: {validateStep}, testStep: {testStep}"
        )

    # Model predictions
    start = time.time()
    trainPred = rnn.model.predict_generator(
        trainGen.generator(returnLabel=False), trainStep)
    end = time.time()
    if DEBUG:
        print(
            f"Time to make {trainPred.shape} training predictions: {end - start:.3f}, training dataset shape {trainingSet.shape}"
        )

    start = time.time()
    validatePred = rnn.model.predict_generator(
        validateGen.generator(returnLabel=False), validateStep)
    end = time.time()
    if DEBUG:
        print(
            f"Time to make {validatePred.shape} validation predictions: {end - start:.3f}, validation dataset shape {validationSet.shape}"
        )

    start = time.time()
    testPred = rnn.model.predict_generator(
        testGen.generator(returnLabel=False), testStep)
    end = time.time()
    if DEBUG:
        print(
            f"Time to make {testPred.shape} test predictions: {end - start:.3f}, test dataset shape {testSet.shape}"
        )

    # Undo the standardization on the predictions
    trainPred = scaler.inverse_transform(trainPred)
    validatePred = scaler.inverse_transform(validatePred)
    testPred = scaler.inverse_transform(testPred)

    #  Sampling like this
    #   | - minIndex - |                 | - maxStepIndex - |
    #  [   ..........  {    TRUE SIZE    }  ..............  ]
    trainingTruth = trainingSet[trainGen.
                                minIndex:-trainGen.maxStepIndex].ravel()
    validationTruth = validationSet[validateGen.minIndex:-validateGen.
                                    maxStepIndex].ravel()
    testTruth = testSet[testGen.minIndex:-testGen.maxStepIndex].ravel()

    if DEBUG:
        print(
            f"trainingTruth shape: {trainingTruth.shape}, validationTruth shape: {validationTruth.shape}, testTruth shape: {testTruth.shape}"
        )

    groundTruth = np.block([trainingTruth, validationTruth, testTruth])

    return trainPred, validatePred, testPred, groundTruth
Exemple #14
0
def train(resnet_model, is_training, F, H, F_curr, H_curr,
          input_images_blur, input_images_boundary, next_boundary_gt, labels,
          data_dir, data_dir_valid, img_list, img_list_valid,
          dropout_ratio):

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    val_step = tf.get_variable('val_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    # define the losses.
    lambda_ = 1e-5

    loss_1 = resnet_model.l2_loss_(resnet_model.logits, labels)
    loss_2 = resnet_model.l2_loss_(resnet_model.next_frame,next_boundary_gt)
    loss_3 = resnet_model.l2_loss_(input_images_blur[:,:,:,-3:],resnet_model.video_deblur_output)
    loss_ = loss_1+loss_2+loss_3+tf.reduce_sum(tf.square(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)))*lambda_

    ema = tf.train.ExponentialMovingAverage(resnet_model.MOVING_AVERAGE_DECAY, global_step)
    tf.add_to_collection(resnet_model.UPDATE_OPS_COLLECTION, ema.apply([loss_]))
    tf.summary.scalar('loss_avg', ema.average(loss_))

    ema = tf.train.ExponentialMovingAverage(0.9, val_step)
    val_op = tf.group(val_step.assign_add(1), ema.apply([loss_]))
    tf.summary.scalar('loss_valid', ema.average(loss_))

    tf.summary.scalar('learning_rate', FLAGS.learning_rate)

    # define the optimizer and back propagate.
    opt = tf.train.AdamOptimizer(FLAGS.learning_rate)
    grads = opt.compute_gradients(loss_)
    for grad, var in grads:
        if grad is not None and not FLAGS.minimal_summaries:
            tf.summary.histogram(var.op.name + '/gradients', grad)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    batchnorm_updates = tf.get_collection(resnet_model.UPDATE_OPS_COLLECTION)
    batchnorm_updates_op = tf.group(*batchnorm_updates)
    train_op = tf.group(apply_gradient_op, batchnorm_updates_op)

    saver_all = tf.train.Saver(tf.all_variables())

    summary_op = tf.summary.merge_all()

    # initialize all variables
    init = tf.initialize_all_variables()
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    sess.run(init)

    summary_writer = tf.summary.FileWriter(FLAGS.end_2_end_train_dir, sess.graph)
    val_summary_writer = tf.summary.FileWriter(FLAGS.end_2_end_valid_dir)
    val_save_root = os.path.join(FLAGS.end_2_end_valid_dir,'visualization')
    compare_save_root = os.path.join(FLAGS.end_2_end_valid_dir,'deblur_compare')

    # resume weights
    resume(sess, FLAGS.resume_structure_predictor, FLAGS.structure_predictor_train_dir, 'voxel_flow_model_')
    resume(sess, FLAGS.resume_video_deblur, FLAGS.video_deblur_train_dir, 'video_deblur_model_')
    resume(sess, FLAGS.resume_resnet, FLAGS.resnet_train_dir, 'resnet_model_')
    resume(sess, FLAGS.resume_all, FLAGS.end_2_end_train_dir, '')
    
    # create data generator
    if FLAGS.training_period == 'pretrain':
        dataset = DataGenerator(data_dir, img_list, data_dir_valid, img_list_valid)
        dataset._create_train_sets_for_300W()
        dataset._create_valid_sets_for_300W()
    elif FLAGS.training_period == 'train':
        dataset = DataGenerator(data_dir,img_list)
        dataset._create_train_table()
        dataset._create_sets_for_300VW()
    else:
        raise NameError("No such training_period!")
    train_gen = dataset._aux_generator(batch_size = FLAGS.batch_size,
                                    num_input_imgs = num_input_imgs,
                                    NUM_CLASSES = POINTS_NUM*2,
                                    sample_set='train')
    valid_gen = dataset._aux_generator(batch_size = FLAGS.batch_size,
                                    num_input_imgs = num_input_imgs,
                                    NUM_CLASSES = POINTS_NUM*2,
                                    sample_set='valid')
    
    # main training process.
    for x in xrange(FLAGS.max_steps + 1):

        start_time = time.time()
        step = sess.run(global_step)
        i = [train_op, loss_]
        write_summary = step > 1 and not (step % 100)
        if write_summary:
            i.append(summary_op)
        i.append(resnet_model.logits)
        i.append(F_curr)
        i.append(H_curr)

        train_line_num, frame_name, input_boundaries, boundary_gt_train, input_images_blur_generated, landmark_gt_train = next(train_gen)

        if (frame_name == '2.jpg'):
            input_images_boundary_init = copy.deepcopy(input_boundaries)
            F_init = np.zeros([FLAGS.batch_size,
                               IMAGE_SIZE//2,
                               IMAGE_SIZE//2,
                               structure_predictor_net_channel//2], dtype=np.float32)

            H_init = np.zeros([1,
                               FLAGS.batch_size,
                               IMAGE_SIZE//2,
                               IMAGE_SIZE//2,
                               structure_predictor_net_channel], dtype=np.float32)
            feed_dict={
                    input_images_boundary:input_images_boundary_init,
                    input_images_blur:input_images_blur_generated,
                    F:F_init,
                    H:H_init,
                    labels:landmark_gt_train,
                    next_boundary_gt:boundary_gt_train,
                    dropout_ratio:0.5
                    }
        else:
            output_points = o[-3]
            output_points = np.reshape(output_points,(POINTS_NUM,2))

            boundary_from_points = points_to_heatmap_rectangle_68pt(output_points)
            boundary_from_points = np.expand_dims(boundary_from_points,axis=0)
            boundary_from_points = np.expand_dims(boundary_from_points,axis=3)
            input_images_boundary_init = np.concatenate([input_images_boundary_init[:,:,:,1:2],
                                                         boundary_from_points], axis=3)
            feed_dict={
                    input_images_boundary:input_images_boundary_init,
                    input_images_blur:input_images_blur_generated,
                    F:o[-2],
                    H:o[-1],
                    labels:landmark_gt_train,
                    next_boundary_gt:boundary_gt_train,
                    dropout_ratio:0.5
                    }

        o = sess.run(i,feed_dict=feed_dict)
        loss_value = o[1]
        duration = time.time() - start_time
        assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

        if step > 1 and step % 300 == 0:
            examples_per_sec = FLAGS.batch_size / float(duration)
            format_str = ('step %d, loss = %.2f (%.1f examples/sec; %.3f '
                          'sec/batch)')
            print(format_str % (step, loss_value, examples_per_sec, duration))

        if write_summary:
            summary_str = o[2]
            summary_writer.add_summary(summary_str, step)

        if step > 1 and step % 300 == 0:
            checkpoint_path = os.path.join(FLAGS.end_2_end_train_dir, 'model.ckpt')
            ensure_dir(checkpoint_path)
            saver_all.save(sess, checkpoint_path, global_step=global_step)

        # Run validation periodically
        if step > 1 and step % 300 == 0:
            valid_line_num, frame_name, input_boundaries, boundary_gt_valid, input_images_blur_generated, landmark_gt_valid = next(valid_gen)

            if (frame_name == '2.jpg')  or valid_line_num <= 3:
                input_images_boundary_init = copy.deepcopy(input_boundaries)
                F_init = np.zeros([FLAGS.batch_size,
                                   IMAGE_SIZE//2,
                                   IMAGE_SIZE//2,
                                   structure_predictor_net_channel//2], dtype=np.float32)

                H_init = np.zeros([1, FLAGS.batch_size,
                                   IMAGE_SIZE//2,
                                   IMAGE_SIZE//2,
                                   structure_predictor_net_channel], dtype=np.float32)

                feed_dict={input_images_boundary:input_images_boundary_init,
                        input_images_blur:input_images_blur_generated,
                        F:F_init,
                        H:H_init,
                        labels:landmark_gt_valid,
                        next_boundary_gt:boundary_gt_valid,
                        dropout_ratio:1.0
                        }
            else:
                output_points = o_valid[-3]
                output_points = np.reshape(output_points,(POINTS_NUM,2))
                boundary_from_points = points_to_heatmap_rectangle_68pt(output_points)
                boundary_from_points = np.expand_dims(boundary_from_points,axis=0)
                boundary_from_points = np.expand_dims(boundary_from_points,axis=3)

                input_images_boundary_init = np.concatenate([input_images_boundary_init[:,:,:,1:2],
                                                             boundary_from_points], axis=3)
                feed_dict={
                        input_images_boundary:input_images_boundary_init,
                        input_images_blur:input_images_blur_generated,
                        F:o_valid[-2],
                        H:o_valid[-1],
                        labels:landmark_gt_valid,
                        next_boundary_gt:boundary_gt_valid,
                        dropout_ratio:1.0
                        }
            i_valid = [loss_,resnet_model.logits,F_curr,H_curr]
            o_valid = sess.run(i_valid,feed_dict=feed_dict)
            print('Validation top1 error %.2f' % o_valid[0])
            if write_summary:
                val_summary_writer.add_summary(summary_str, step)
            img_video_deblur_output = sess.run(resnet_model.video_deblur_output,feed_dict=feed_dict)[0]*255
            img = input_images_blur_generated[0,:,:,0:3]*255
            compare_img = np.concatenate([img,img_video_deblur_output],axis=1)
            points = o_valid[1][0]*255

            for point_num in range(int(points.shape[0]/2)):
                cv2.circle(img,(int(round(points[point_num*2])),int(round(points[point_num*2+1]))),1,(55,225,155),2)
            val_save_path = os.path.join(val_save_root,str(step)+'.jpg')
            compare_save_path = os.path.join(compare_save_root,str(step)+'.jpg')
            ensure_dir(val_save_path)
            ensure_dir(compare_save_path)
            cv2.imwrite(val_save_path,img)
            cv2.imwrite(compare_save_path,compare_img)
Exemple #15
0
def main():
    # Counting Dataset
    counting_dataset_path = 'counting_data_UCF'
    counting_dataset = list()
    train_labels = {}
    val_labels = {}
    for im_path in glob.glob(os.path.join(counting_dataset_path, '*.jpg')):
        counting_dataset.append(im_path)
        img = image.load_img(im_path)
        gt_file = im_path.replace('.jpg', '_ann.mat')
        h, w = img.size
        dmap, crowd_number = load_gt_from_mat(gt_file, (w, h))
        train_labels[im_path] = dmap
        val_labels[im_path] = crowd_number
    counting_dataset_pyramid, train_labels_pyramid = multiscale_pyramid(
        counting_dataset, train_labels)

    # Ranking Dataset
    ranking_dataset_path = 'ranking_data'
    ranking_dataset = list()
    for im_path in glob.glob(os.path.join(ranking_dataset_path, '*.jpg')):
        ranking_dataset.append(im_path)

    # randomize the order of images before splitting
    np.random.shuffle(counting_dataset)

    split_size = int(round(len(counting_dataset) / 5))
    splits_list = list()
    for t in range(5):
        splits_list.append(counting_dataset[t * split_size:t * split_size +
                                            split_size])

    split_val_labels = {}

    mae_sum = 0.0
    mse_sum = 0.0

    # create folder to save results
    date = str(datetime.datetime.now())
    d = date.split()
    d1 = d[0]
    d2 = d[1].split(':')
    results_folder = 'Results-' + d1 + '-' + d2[0] + '.' + d2[1]
    if not os.path.exists(results_folder):
        os.makedirs(results_folder)

    # 5-fold cross validation
    epochs = int(round(iterations / iterations_per_epoch))
    n_fold = 5

    for f in range(0, n_fold):
        print('\nFold ' + str(f))

        # Model
        model = VGG16(include_top=False, weights='imagenet')
        transfer_layer = model.get_layer('block5_conv3')
        conv_model = Model(inputs=[model.input],
                           outputs=[transfer_layer.output],
                           name='vgg_partial')

        counting_input = Input(shape=(224, 224, 3),
                               dtype='float32',
                               name='counting_input')
        ranking_input = Input(shape=(224, 224, 3),
                              dtype='float32',
                              name='ranking_input')
        x = conv_model([counting_input, ranking_input])
        counting_output = Conv2D(1, (3, 3),
                                 strides=(1, 1),
                                 padding='same',
                                 data_format=None,
                                 dilation_rate=(1, 1),
                                 activation='relu',
                                 use_bias=True,
                                 kernel_initializer='glorot_uniform',
                                 bias_initializer='zeros',
                                 kernel_regularizer=None,
                                 bias_regularizer=None,
                                 activity_regularizer=None,
                                 kernel_constraint=None,
                                 bias_constraint=None,
                                 name='counting_output')(x)

        # The ranking output is computed using SUM pool. Here I use
        # GlobalAveragePooling2D followed by a multiplication by 14^2 to do
        # this.
        ranking_output = Lambda(
            lambda i: 14.0 * 14.0 * i,
            name='ranking_output')(GlobalAveragePooling2D(
                name='global_average_pooling2d')(counting_output))
        train_model = Model(inputs=[counting_input, ranking_input],
                            outputs=[counting_output, ranking_output])
        train_model.summary()

        # l2 weight decay
        for layer in train_model.layers:
            if hasattr(layer, 'kernel_regularizer'):
                layer.kernel_regularizer = regularizers.l2(5e-4)
            elif layer.name == 'vgg_partial':
                for l in layer.layers:
                    if hasattr(l, 'kernel_regularizer'):
                        l.kernel_regularizer = regularizers.l2(5e-4)

        optimizer = SGD(lr=0.0, decay=0.0, momentum=0.9, nesterov=False)
        loss = {
            'counting_output': euclideanDistanceCountingLoss,
            'ranking_output': pairwiseRankingHingeLoss
        }
        loss_weights = [1.0, 0.0]
        train_model.compile(optimizer=optimizer,
                            loss=loss,
                            loss_weights=loss_weights)

        splits_list_tmp = splits_list.copy()

        # counting validation split
        split_val = splits_list_tmp[f]

        del splits_list_tmp[f]
        flat = itertools.chain.from_iterable(splits_list_tmp)

        # counting train split
        split_train = list(flat)

        # counting validation split labels
        split_val_labels = {k: val_labels[k] for k in split_val}

        counting_dataset_pyramid_split = []
        train_labels_pyramid_split = []
        for key in split_train:
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][0])
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][1])
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][2])
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][3])
            counting_dataset_pyramid_split.append(
                counting_dataset_pyramid[key][4])

            train_labels_pyramid_split.append(train_labels_pyramid[key][0])
            train_labels_pyramid_split.append(train_labels_pyramid[key][1])
            train_labels_pyramid_split.append(train_labels_pyramid[key][2])
            train_labels_pyramid_split.append(train_labels_pyramid[key][3])
            train_labels_pyramid_split.append(train_labels_pyramid[key][4])

        index_shuf = np.arange(len(counting_dataset_pyramid_split))
        np.random.shuffle(index_shuf)
        counting_dataset_pyramid_split_shuf = []
        train_labels_pyramid_split_shuf = []
        for i in index_shuf:
            counting_dataset_pyramid_split_shuf.append(
                counting_dataset_pyramid_split[i])
            train_labels_pyramid_split_shuf.append(
                train_labels_pyramid_split[i])

        train_generator = DataGenerator(counting_dataset_pyramid_split_shuf,
                                        train_labels_pyramid_split_shuf,
                                        ranking_dataset, **params)
        lrate = LearningRateScheduler(step_decay)
        callbacks_list = [lrate]
        train_model.fit_generator(generator=train_generator,
                                  epochs=epochs,
                                  callbacks=callbacks_list)

        #test images
        tmp_model = train_model.get_layer('vgg_partial')
        test_input = Input(shape=(None, None, 3),
                           dtype='float32',
                           name='test_input')
        new_input = tmp_model(test_input)
        co = train_model.get_layer('counting_output')(new_input)
        test_output = Lambda(lambda i: K.sum(i, axis=(1, 2)),
                             name='test_output')(co)
        test_model = Model(inputs=[test_input], outputs=[test_output])

        predictions = np.empty((len(split_val), 1))
        y_validation = np.empty((len(split_val), 1))
        for i in range(len(split_val)):
            img = image.load_img(split_val[i], target_size=(224, 224))
            img_to_array = image.img_to_array(img)
            img_to_array = preprocess_input(img_to_array)
            img_to_array = np.expand_dims(img_to_array, axis=0)

            pred_test = test_model.predict(img_to_array)
            predictions[i] = pred_test
            y_validation[i] = split_val_labels[split_val[i]]

        mean_abs_err = mae(predictions, y_validation)
        mean_sqr_err = mse(predictions, y_validation)

        # serialize model to JSON
        model_json = test_model.to_json()
        model_json_name = "test_model_" + str(f) + ".json"
        with open(model_json_name, "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        model_h5_name = "test_model_" + str(f) + ".h5"
        test_model.save_weights(model_h5_name)
        print("Saved model to disk")

        print('\n######################')
        print('Results on TEST SPLIT:')
        print(' MAE: {}'.format(mean_abs_err))
        print(' MSE: {}'.format(mean_sqr_err))
        print("Took %f seconds" % (time.time() - s))
        path1 = results_folder + '/test_split_results_fold-' + str(f) + '.txt'
        with open(path1, 'w') as f:
            f.write('mae: %f,\nmse: %f, \nTook %f seconds' %
                    (mean_abs_err, mean_sqr_err, time.time() - s))

        mae_sum = mae_sum + mean_abs_err
        mse_sum = mse_sum + mean_sqr_err

    print('\n################################')
    print('Average Results on TEST SPLIT:')
    print(' AVE MAE: {}'.format(mae_sum / n_fold))
    print(' AVE MSE: {}'.format(mse_sum / n_fold))
    print("Took %f seconds" % (time.time() - s))
    path2 = results_folder + '/test_split_results_avg.txt'
    with open(path2, 'w') as f:
        f.write('avg_mae: %f, \navg_mse: %f, \nTook %f seconds' %
                (mae_sum / n_fold, mse_sum / n_fold, time.time() - s))
Exemple #16
0
def main():
    best_test_loss = np.inf
    model = Yolov1_vgg16bn(pretrained=True)
    print('pre-trained vgg16 model has loaded!')

    previous_model_path = model_name
    exists = os.path.isfile(previous_model_path)
    if exists:
        print("Starting from previous result...")
        model.load_state_dict(torch.load(previous_model_path))
    else:
        print("Starting with new train")

    #print(model)

    print('')

    if use_gpu:
        model.cuda()

    # Data
    print('==> Preparing data..')
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    #transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)) parent_dir, img_size, S, B, C, transforms, num = 15000):

    train_dataset = DataGenerator(parent_dir=img_folder,
                                  img_size=img_size,
                                  S=S,
                                  B=B,
                                  C=C,
                                  transform=transform,
                                  num=train_num,
                                  train=True)

    train_loader = DataLoader(train_dataset,
                              batch_size=n_batch,
                              shuffle=True,
                              num_workers=8)

    test_dataset = DataGenerator(parent_dir=validate_folder,
                                 img_size=img_size,
                                 S=S,
                                 B=B,
                                 C=C,
                                 transform=transform,
                                 num=test_num,
                                 train=False)
    test_loader = DataLoader(test_dataset,
                             batch_size=n_batch,
                             shuffle=False,
                             num_workers=8)

    model.train()

    train_val_loss_log = open(
        os.path.join(results_folder, 'train_val_loss_log'), 'w+')
    #loss_fn = YoloLoss(B, S, lambda_coord, lambda_noobj)
    loss_fn = YoloLossNew(B, S, C, lambda_coord, lambda_noobj)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.0001,
                                momentum=0.9,
                                weight_decay=0.0005)
    #optimizer = torch.optim.SGD(model.parameters(),lr=0.0001)
    scheduler = GradualWarmupScheduler(optimizer, multiplier=8, total_epoch=30)

    for epoch in range(num_epochs):
        scheduler.step(epoch)
        print(epoch, optimizer.param_groups[0]['lr'])
        for i, (img_name, images, target) in enumerate(train_loader):
            #images = images.float()
            #target = target.float()
            images = Variable(images)
            target = Variable(target)
            if use_gpu:
                images, target = images.cuda(), target.cuda()

            optimizer.zero_grad()

            pred = model(images)
            loss = loss_fn(pred, target)
            current_loss = loss.item()

            loss.backward()
            optimizer.step()
            if i % 20 == 0:
                print(
                    "\r%d/%d batches in %d/%d iteration, current error is %f" %
                    (i, len(train_loader), epoch + 1, num_epochs,
                     current_loss))

        save_model_by_epoch(epoch, model)

        # validat on validation set
        validation_loss = 0.0
        model.eval()
        with torch.no_grad():
            for i, (img_name, images, target) in enumerate(test_loader):
                #image = images.float()
                #target = target.float()
                images = Variable(images)
                target = Variable(target)
                if use_gpu:
                    images, target = images.cuda(), target.cuda()

                pred = model(images)
                loss = loss_fn(pred, target)
                validation_loss += loss.item()

        validation_loss /= len(test_loader)
        # log the training loss and validation loss every epoch
        log_str = 'epoch: {}, train_loss: {}, val_loss: {} \n'.format(
            epoch + 1, current_loss, validation_loss)
        print(log_str)
        train_val_loss_log.writelines(log_str)
        train_val_loss_log.flush()
        if best_test_loss > validation_loss:
            best_test_loss = validation_loss
            save_torch_model(model, 'best.pth', epoch)

    train_val_loss_log.close()
Exemple #17
0
with open(os.path.join(data_dir, 'train.pkl'), 'rb') as f:
    gs_selected_train, ocr_selected_train = pickle.load(f)

with open(os.path.join(data_dir, 'val.pkl'), 'rb') as f:
    gs_selected_val, ocr_selected_val = pickle.load(f)

with open(os.path.join(data_dir, 'ci.pkl'), 'rb') as f:
    ci = pickle.load(f)

n_vocab = len(ci)

dg_val = DataGenerator(xData=ocr_selected_val,
                       yData=gs_selected_val,
                       char_to_int=ci,
                       seq_length=seq_length,
                       padding_char=pc,
                       oov_char=oc,
                       batch_size=batch_size,
                       shuffle=shuffle)
dg_train = DataGenerator(xData=ocr_selected_train,
                         yData=gs_selected_train,
                         char_to_int=ci,
                         seq_length=seq_length,
                         padding_char=pc,
                         oov_char=oc,
                         batch_size=batch_size,
                         shuffle=shuffle)

# create the network
model = Sequential()
def train_model(Dataset,CompileModel,TrainModel,name_data,name_model,name_weights,
             num_data_tr,num_data_val,patch_size,num_epochs,lr,params):
# =============================================================================
# Configuration
# =============================================================================
    ''' Paths for dataset '''
    path_home = os.getcwd()
    tr_path = os.path.join(path_home, 'dataset', name_data, 'train')
    vl_path = os.path.join(path_home, 'dataset', name_data, 'validation')
    tdpath = os.path.join(tr_path, 'seis')
    tfpath = os.path.join(tr_path, 'fault')
    vdpath = os.path.join(vl_path, 'seis')
    vfpath = os.path.join(vl_path, 'fault')

    ''' Paths for model, weights, and metircs '''    
    path_model = os.path.join(path_home, 'model')
    path_model_arch = os.path.join(path_model, name_model + '.json')
    path_weights = os.path.join(path_model, 'weights', name_weights + '.h5')
    path_hists = os.path.join(path_model, 'weights', name_weights + '_hist.txt')
    path_cb = os.path.join(path_model, 'call_back', name_weights)

    t0 = time()
# =============================================================================
# Build Training & Validation Dataset
# =============================================================================
    if Dataset:
        ''' Generate Synthetic Data '''
        print('Generating Training Data')
        SyntheticSeisGen(tr_path, num_data_tr, patch_size)
        print('\nGenerating Validation Data')
        SyntheticSeisGen(vl_path, num_data_val, patch_size)
        print('\nSaving Dataset')
    else:
        if not (os.path.exists(tr_path) | os.path.exists(vl_path)):
            print("Please Create Dataset First!")

# =============================================================================
# Create 3D Convolutional Neural Network Model
# =============================================================================
    if CompileModel:
        ''' Compile CNN Model '''
        print('Creating CNN Model')
        conv_model = create_model((*[int(patch_size)]*3,1), lr)
        model = conv_model.model
        ''' Save CNN Model '''
        json_string = model.to_json()
        open(path_model_arch,'w').write(json_string)        
    else:
        ''' Load CNN Model '''
        print('Loading CNN Model')

        json_file = open(path_model_arch, 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        model = model_from_json(loaded_model_json)    
        model.compile(optimizer=optimizers.Adam(lr),
                      loss=cross_entropy_balanced, metrics=['accuracy'])

# =============================================================================
# Train CNN Model    
# =============================================================================
    if TrainModel:
        ''' Callbacks Configuration '''
        cp_fn = os.path.join(path_cb, 'checkpoint.{epoch:02d}.h5')
        cp_cb = callbacks.ModelCheckpoint(filepath=cp_fn, verbose=1, save_best_only=False)
        csv_fn = os.path.join(path_cb, 'train_log.csv')
        csv_cb = callbacks.CSVLogger(csv_fn, append=True, separator=';')
        tb_cb = callbacks.TensorBoard(log_dir=path_cb, histogram_freq=0, batch_size=2,
                                      write_graph=True, write_grads=True, write_images=True)
        cbks = [cp_cb, csv_cb, tb_cb]
           
        ''' Train CNN Model '''
        print('\nModel Fitting')
        tdata_IDs = range(num_data_tr)
        vdata_IDs = range(num_data_val)
        tr_gen = DataGenerator(dpath=tdpath,fpath=tfpath,data_IDs=tdata_IDs,**params)
        val_gen = DataGenerator(dpath=vdpath,fpath=vfpath,data_IDs=vdata_IDs,**params)
        history = model.fit_generator(generator=tr_gen, validation_data=val_gen,
                                      epochs=num_epochs,verbose=1,callbacks=cbks)
        history_dict = history.history

        ''' Save Weights & Metrics '''
        model.save_weights(path_weights)
        json.dump(history_dict, open(path_hists, 'w'))        
    else:
        ''' Load Metrics & Trained Model Weights '''
        model.load_weights(path_weights)
        history_dict = json.load(open(path_hists, 'r'))
        
    print('\nElapsed time: ' + "{:.2f}".format((time()-t0)/60) + ' min')
    return model, history_dict
Exemple #19
0
    x_vecs = x_mean + np.dot(v, (rand_vecs * e).T).T
    y_faces = func([x_vecs, 0])[0]
    for i in range(y_faces.shape[0]):
        save_image(y_faces[i], 'rand' + str(i) + '.png')
        if i < 5 and (iters % 10) == 0:
            if not os.path.exists('morph' + str(i)):
                os.makedirs('morph' + str(i))
            save_image(y_faces[i],
                       'morph' + str(i) + '/img' + str(iters) + '.png')


make_rand_faces(rand_vecs, 0)

print("Training...")

datagen = DataGenerator(batch_size=BATCH_SIZE)
callbacks = [TensorBoard()]
train_loss = []

for iters in trange(NUM_EPOCHS):
    history = model.fit_generator(datagen, callbacks=callbacks)

    loss = history.history['loss'][-1]
    train_loss.append(loss)
    print("Loss: " + str(loss))

    plotScores(train_loss, [], 'EncoderScores.png', True)

    if iters % 1 == 0:
        model.save('Encoder.h5')
Exemple #20
0
def main(args):
    lstm_dim = 512
    n_answers = 1001
    question_embed_dim = 256

    qa_data = h5.File(os.path.join(args.data_path, "data_prepro.h5"), "r")

    with open(os.path.join(args.data_path, "data_prepro.json"), "r") as file:
        prepro_data = json.load(file)

    if args.extracted:
        img_feat = h5.File(os.path.join(args.data_path, "data_img.h5"),
                           "r")['images_test']
    else:
        print("Loading images")
        img_feat = [
            img_to_array(load_img(os.path.join(args.data_path, image_filename),
                                  target_size=(224, 224)),
                         dtype='uint8',
                         data_format='channels_first')
            for image_filename in prepro_data['unique_img_test']
        ]
        img_feat = np.array(img_feat, dtype=np.uint8)

    VOCAB_SIZE = len(prepro_data['ix_to_word'])
    MAX_QUESTION_LEN = qa_data['ques_test'].shape[1]
    SOS = VOCAB_SIZE + 1
    # Add 1 for SOS and 1 for '0' -> padding
    VOCAB_SIZE += 2

    # Add SOS char at the beginning for every question
    questions = np.zeros((qa_data['ques_test'].shape[0], MAX_QUESTION_LEN + 1))
    questions[:, 1:] = qa_data['ques_test']
    questions[:, 0] = SOS

    ques_to_img = np.array(qa_data['img_pos_test'])

    ix_to_ans = prepro_data['ix_to_ans']
    question_ids = np.array(qa_data['question_id_test']).tolist()
    n_test = len(question_ids)

    # Define appropriate model
    if args.model_type == 'img_ques_att':
        model = ImgQuesAttentionNet(lstm_dim=lstm_dim,
                                    n_answers=n_answers,
                                    model_path=os.path.basename(
                                        args.model_path),
                                    VOCAB_SIZE=VOCAB_SIZE,
                                    MAX_QUESTION_LEN=MAX_QUESTION_LEN,
                                    question_embed_dim=question_embed_dim,
                                    log_path=None)
    elif args.model_type == 'show_n_tell':
        model = ShowNTellNet(lstm_dim=lstm_dim,
                             n_answers=n_answers,
                             model_path=os.path.basename(args.model_path),
                             VOCAB_SIZE=VOCAB_SIZE,
                             MAX_QUESTION_LEN=MAX_QUESTION_LEN,
                             question_embed_dim=question_embed_dim,
                             log_path=None)
    elif args.model_type == 'ques_att':
        model = QuesAttentionShowNTellNet(
            lstm_dim=lstm_dim,
            n_answers=n_answers,
            model_path=os.path.basename(args.model_path),
            VOCAB_SIZE=VOCAB_SIZE,
            MAX_QUESTION_LEN=MAX_QUESTION_LEN,
            question_embed_dim=question_embed_dim,
            log_path=None)

    elif args.model_type == 'conv_attention':
        model = ConvAttentionNet(lstm_dim=lstm_dim,
                                 n_answers=n_answers,
                                 model_path=os.path.basename(args.model_path),
                                 VOCAB_SIZE=VOCAB_SIZE,
                                 MAX_QUESTION_LEN=MAX_QUESTION_LEN,
                                 question_embed_dim=question_embed_dim,
                                 log_path=None)

    elif args.model_type == 'time_dist_cnn':
        model = TimeDistributedCNNNet(lstm_dim=lstm_dim,
                                      n_answers=n_answers,
                                      model_path=os.path.basename(
                                          args.model_path),
                                      VOCAB_SIZE=VOCAB_SIZE,
                                      MAX_QUESTION_LEN=MAX_QUESTION_LEN,
                                      question_embed_dim=question_embed_dim,
                                      log_path=None)

    model.load_weights(weights_filename=args.model_path)

    chunk_size = 100000000
    y_pred = np.zeros(n_test, dtype=np.int)
    n_chunks = len(range(0, n_test, chunk_size))
    for i, batch in enumerate(range(0, n_test, chunk_size)):
        begin = batch
        end = min(n_test, batch + chunk_size)
        # Test data generator
        test_datagen = DataGenerator(img_feat=np.array(img_feat),
                                     questions=questions[begin:end],
                                     answers=[],
                                     ques_to_img=ques_to_img[begin:end],
                                     VOCAB_SIZE=VOCAB_SIZE,
                                     n_answers=n_answers,
                                     batch_size=args.batch_size,
                                     shuffle=False,
                                     split='test')
        y_pred_chunk = model.predict(test_data=test_datagen)
        if (i + 1) % 50 == 0:
            print("Completed testing on {}/{} chunks...".format(
                i + 1, n_chunks))
        y_pred[begin:end] = y_pred_chunk

    write_predictions(filepath=args.dest_path,
                      y_pred=y_pred,
                      ix_to_ans=ix_to_ans,
                      question_ids=question_ids)
Exemple #21
0
	return params


if __name__ == '__main__':
	# add some parameters from the terminal
	parser = argparse.ArgumentParser(description='Launch the training of the Hourglass model.', add_help=True, epilog='Just a test for this parameter')
	parser.add_argument('--version', action='version', version='Version 1.0')
	parser.add_argument('--cfg', required=False, default = './config.cfg', help='The path for your config file')
	args = parser.parse_args()

	print('>>>>> Parsing Config File From %s' %(args.cfg))
	params = process_config(args.cfg)
	
	print('>>>>> Creating Dataset Now')
	# dataset.train_set is the table of the training set's names
	dataset = DataGenerator(joints_name = params['joint_list'],img_dir = params['img_directory'], train_data_file = params['training_txt_file'],
							camera_extrinsic = params['camera_extrinsic'], camera_intrinsic = params['camera_intrinsic'])
	dataset._create_train_table()
	# nfeats:256, nstacks:4 nmodules:1(not used)
	# nlow:4 (Number of downsampling in one stack)
	# mcam:false (attention system(not needed))
	# name:pretrained model
	# tiny:false weighted_loss:false

	os.environ["CUDA_VISIBLE_DEVICES"] = "0"
	model = HourglassModel(nFeat=params['nfeats'], nStack=params['nstacks'], nModules=params['nmodules'], 
		nLow=params['nlow'], outputDim=params['num_joints'], batch_size=params['batch_size'], training=True, 
		drop_rate= params['dropout_rate'], lear_rate=params['learning_rate'], decay=params['learning_rate_decay'], decay_step=params['decay_step'], 
		dataset=dataset, name=params['name'], w_summary = True, logdir_train=params['log_dir_train'], logdir_test=params['log_dir_test'], tiny= params['tiny'],
		w_loss=params['weighted_loss'] , joints= params['joint_list'], gpu_frac=params['gpu_frac'], model_save_dir=params['model_save_dir'])
	
	print('>>>>> Creating Hourglass Model')
Exemple #22
0
            for option in config.options(section):
                params[option] = eval(config.get(section, option))
        if section == 'Saver':
            for option in config.options(section):
                params[option] = eval(config.get(section, option))
    return params


if __name__ == '__main__':
    print('--Parsing Config File')
    params = process_config('config.cfg')
    print('--Creating Dataset')
    dataset1 = DataGenerator(params['joint_list'],
                             params['img_directory1'],
                             params['training_txt_file1'],
                             remove_joints=params['remove_joints'],
                             img_dir_test=params['img_directory_test1'],
                             test_data_file=params['test_txt_file1'],
                             train_3D_gt=params['train_3d_gt'],
                             test_3D_gt=params['test_3d_gt'])
    dataset2 = DataGenerator(params['joint_list'],
                             params['img_directory2'],
                             params['training_txt_file2'],
                             remove_joints=params['remove_joints'],
                             img_dir_test=params['img_directory_test2'],
                             test_data_file=params['test_txt_file2'])
    dataset3 = DataGenerator(params['joint_list'],
                             params['img_directory3'],
                             params['training_txt_file3'],
                             remove_joints=params['remove_joints'],
                             img_dir_test=params['img_directory_test3'],
                             test_data_file=params['test_txt_file3'])
Exemple #23
0
def do_ddt_runs(expt):
    gen = DataGenerator(expt.num_phonemes, expt.num_features,
                        expt.var_diag_interval, expt.var_offdiag_interval)

    perfect_practice_data = gen.generate_simulated_data(
        expt.num_practice_frames)
    practice_data, num_practice_errors = gen.add_errors_to_data(
        perfect_practice_data, expt.practice_error_rate)
    practice_data_dict = partition_data(practice_data)
    # We got some practice data for every point, right?
    assert (len(practice_data_dict.keys() == expt.num_phonemes))

    test_data = gen.generate_simulated_data(expt.num_test_frames)

    n = expt.num_training_frames
    assert (n * expt.training_error_rate >= 5)  # number of errorful points
    assert (n * (1 - expt.training_error_rate) > 5)  # number of correct points
    error_training_frame_indices = range(0, 5)
    correct_training_frame_indices = range(n - 5, n)

    all_results = {}
    all_results['Error'] = []
    all_results['Correct'] = []
    for run_idx in range(0, expt.num_runs):
        training_data, num_errors = make_training_data(gen, expt)
        c = SimpleClassifier(gen.get_labels(), gen.num_features)
        c.train_all(training_data)

        def run_some_frames(frame_indices):
            frame_results = []
            for i in frame_indices:
                label = training_data[i][0]
                a = SimpleAllele(c, [label])

                # subtract (label, frame) from training_data for active phoneme
                alt_data = training_data[:i] + training_data[i + 1:]

                # train alternate model in allele on alternate data
                a.train_variants(alt_data)
                # print a.make_details_string()

                # Construct a subset of the practice data with only the points
                # which are labelled with the active label of the allele (see comments below).
                data = [(label, point) for point in practice_data_dict[label]]
                results = measurePrimaryAndVariantAccuracy(a, data)

                # KJB - here's the original version, in which we just
                # used all the practice data This essential means we
                # aren't using the practice data labels at all, which
                # might be an interesting variation, but isn't the
                # original intention.
                #results = measurePrimaryAndVariantAccuracy(a, practice_data)

                frame_results.append(results)
            return frame_results

        error_results = run_some_frames(error_training_frame_indices)
        all_results['Error'].append(error_results)
        correct_results = run_some_frames(correct_training_frame_indices)
        all_results['Correct'].append(correct_results)
    return all_results
Exemple #24
0
if __name__ == '__main__':

    environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

    environ["CUDA_VISIBLE_DEVICES"] = "7"

    print('--Parsing Config File')
    params = process_config('config.cfg')

    print('--Creating Dataset')
    dataset = DataGenerator(params['img_directory'],
                            params['training_txt_file'],
                            params['num_joints'],
                            params['val_directory'],
                            params['val_txt_file'],
                            params['test_directory'],
                            params['test_txt_file'],
                            params['resolutions'],
                            params['headed'],
                            head_train=params['head_train'],
                            head_test=params['head_test'],
                            head_val=params['head_val'])
    dataset._create_test_table(
    )  #creates the lists with dicts of the coord. of boxes, joints and the corresp. weights

    model = HourglassModel(nFeat=params['nfeats'],
                           nStack=params['nstacks'],
                           nModules=params['nmodules'],
                           nLow=params['nlow'],
                           outputDim=params['num_joints'],
                           batch_size=params['batch_size'],
                           attention=params['mcam'],
Exemple #25
0
        allow_pickle=True).item()
    labels_dict = np.load(
        '/dccstor/cmv/MovieSummaries/embeddings/dummy_labels.npy',
        allow_pickle=True).item()
    print('\nDone Loading')

train_ids, val_ids, train_labels, val_labels = train_test_split(
    list(labels_dict.keys()),
    list(labels_dict.values()),
    test_size=0.2,
    random_state=42)

train_generator = DataGenerator(mode='train',
                                data_dict=embedding_dict,
                                list_IDs=train_ids,
                                labels_dict=labels_dict,
                                num_classes=NUM_CLASSES,
                                batch_size=TRAIN_BATCH_SIZE,
                                shuffle=True)

valid_generator = DataGenerator(mode='val',
                                data_dict=embedding_dict,
                                list_IDs=val_ids,
                                labels_dict=labels_dict,
                                num_classes=NUM_CLASSES,
                                batch_size=VAL_BATCH_SIZE,
                                shuffle=False)
with tf.device('/cpu:0'):
    model = BiLSTM(num_classes=NUM_CLASSES,
                   reg=args.regularization,
                   reg_wt=args.regularization_weight)
Exemple #26
0
from no_batch import MlpSingle
from MLP_batch import MlpBatch
import platform
import numpy as np

if __name__ == '__main__':
    if platform.system() == 'Windows':
        folder = 'C:/data/train_data'
    elif platform.system() == 'Linux':
        folder = '/home/shaoheng/Documents/PythonPractice/handwritedigit'

    batch = 5
    class_num = 10

    data_generator = DataGenerator(folder,
                                   batch, (16, 16),
                                   class_num=class_num)
    model = MlpBatch(input_nodes=16 * 16,
                     hidden_nodes=(12, class_num),
                     batch_size=batch)

    right = 0
    for i in range(1000000):
        if (i + 1) % 100 == 0:
            print('acc=%.2f' % (right / (i * batch) * 100), '%')

        x, y = data_generator.load_data()
        out = model.forward_prop(x)
        model.back_prop(x, y)

        for b in range(batch):
Exemple #27
0
                help="path to testing dataset")
ap.add_argument("-c", "--csv", required=True, help="path to testing CSV file")
ap.add_argument("-m", "--model", required=True, help="path to model (.h5)")
ap.add_argument("-p",
                "--pred",
                required=True,
                help="path to prediction results")
args = vars(ap.parse_args())

data_test = pd.read_csv(args['csv'])
data_test["file_path"] = data_test["Id"].apply(
    lambda x: os.path.join(args['dataset'],
                           str(x) + ".npy"))
test_model = load_model(args['model'])

test_generator = DataGenerator(data_test['file_path'],
                               batch_size=50,
                               test=True,
                               shuffle=False)
final_result = test_model.predict_generator(generator=test_generator)
compiled_result = pd.DataFrame(final_result)
compiled_result = compiled_result.rename(columns={
    "Unnamed: 0": "Id",
    '0': 'Predicted'
})

if not os.path.isdir(os.path.dirname(args['pred'])):
    os.makedirs(os.path.dirname(args['pred']))

compiled_result.to_csv(args['pred'])
Exemple #28
0
        return -1 * (x - y) / ((x - 1) * x + eps)


if __name__ == '__main__':

    if platform.system() == 'Windows':
        folder = 'C:/data/train_data'
        test_folder = 'C:/data/test_data'
    elif platform.system() == 'Linux':
        folder = '/home/shaoheng/Documents/PythonPractice/handwritedigit'

    batch = 32
    class_num = 10

    data_generator = DataGenerator(folder,
                                   batch, (16, 16),
                                   class_num=class_num)

    valid_data_gen = DataGenerator(test_folder,
                                   160, (16, 16),
                                   class_num=class_num)
    valid_x, valid_y = valid_data_gen.load_data()

    train_data_gen = DataGenerator(folder, 320, (16, 16), class_num=class_num)
    train_full_x, train_full_y = train_data_gen.load_data()

    def forward(x):
        for deep, now_layer in enumerate(model):
            if type(now_layer) == Conv2D and deep == 0:
                x = np.expand_dims(x, -1)
            elif type(now_layer) == FC:
val_images = []
for folder in ['test\\fake', 'test\\real']:
    for image in os.listdir(folder):
        val_images.append(os.path.join(folder, image))

val_labels = {}
for image in tqdm(val_images):
    if image.split('\\')[1] == 'real':
        val_labels[image] = 0
    else:
        val_labels[image] = 1

train_gen = DataGenerator(train_images,
                          train_labels,
                          batch_size=bs,
                          dim=dim,
                          type_gen='train')
val_gen = DataGenerator(val_images,
                        val_labels,
                        batch_size=bs,
                        dim=dim,
                        type_gen='test')

X, Y = train_gen[0]

print(len(X), X[0].shape, X[1].shape)
print(Y)

fig = plt.figure(figsize=(8, 8))
columns = 4
for i in partition["train"]:
    labels[i]=float(angle[i])* scipy.pi / 180
for i in partition["validation"]:
    labels[i]=float(angle[i])* scipy.pi / 180




    
# Parameters for datagen.py
params = {'dim': (66,200,3),
          'batch_size': 32,
          'shuffle': True}

# Generators
training_generator = DataGenerator(partition["train"], labels, **params)
validation_generator = DataGenerator(partition["validation"], labels, **params)


#defining our model and compile with adam optimizer and mean squere error.
model=defineModel()
model.compile(optimizer='adam', loss="mse")
    
#train it for 10 epochs
model.fit_generator(generator=training_generator,
                    epochs=10,   
                    validation_data=validation_generator)    

#save trained model.
model.save("model.h5")