Esempio n. 1
0
def main(_):
    """ Typical usage

    For <model_name> see your folder name in ../checkpoints. 

    Training
    ``` sh
    $ python main.py --mode train --model <model> (if restoring or naming a model: --model_name <model_name>)
    ```
    
    Evaluation
    ``` sh
    $ python main.py --mode eval --model <model> --model_name <model_name>
    ```

    Shell
    ``` sh
    $ python main.py --mode shell --model <model> --model_name <model_name>
    ```
    """
    # Load data
    train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'),
                         max_question_length=FLAGS.max_question_length,
                         max_paragraph_length=FLAGS.max_paragraph_length)
    dev = SquadDataset(*get_data_paths(FLAGS.data_dir, name='val'),
                       max_question_length=FLAGS.max_question_length,
                       max_paragraph_length=FLAGS.max_paragraph_length
                       )  # change to eval to zero if too long

    logging.info(f'Train/Dev size {train.length}/{dev.length}')

    # Load embeddings
    embed_path = FLAGS.embed_path or pjoin(
        FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size))
    embeddings = np.load(embed_path)['glove']  # 115373

    # Build model
    if FLAGS.model in ('baseline', 'mixed', 'dcnplus', 'dcn'):
        model = DCN(embeddings, FLAGS.__flags)
    elif FLAGS.model == 'cat':
        from networks.cat import Graph
        model = Graph(embeddings)
    else:
        raise ValueError(f'{FLAGS.model} is not a supported model')

    # Run mode
    if FLAGS.mode == 'train':
        save_flags()
        do_train(model, train, dev)
    elif FLAGS.mode == 'eval':
        do_eval(model, train, dev)
    elif FLAGS.mode == 'overfit':
        test_overfit(model, train)
    elif FLAGS.mode == 'shell':
        do_shell(model, dev)
    else:
        raise ValueError(f'Incorrect mode entered, {FLAGS.mode}')
Esempio n. 2
0
def create_crops(source_path: str,
                 size_x: int,
                 size_y: int,
                 step_x: int,
                 step_y: int,
                 mask_converter: Callable[[np.ndarray],
                                          np.ndarray] = identity):
    crops_path = '{}_crops{}x{}'.format(source_path, size_x, size_y)
    clear_and_create(crops_path)

    def pair_creator(img_name: str) -> Tuple[str, str]:
        origin_name = '{}.{}'.format(img_name, 'jpg')
        mask_name = re.sub('_img', '_mask.png', img_name)

        origin_path = join(source_path, origin_name)
        mask_path = join(source_path, mask_name)

        return origin_path, mask_path

    args = get_data_paths(source_path, pair_creator=pair_creator)
    print('creating_generator...')
    generator = dataset_generator(*args,
                                  size_x=size_x,
                                  size_y=size_y,
                                  step_x=step_x,
                                  step_y=step_y,
                                  mask_converter=mask_converter)
    print('generator has been created')

    for idx, (img, mask) in enumerate(generator):
        cv2.imwrite('{}/{}_img.jpg'.format(crops_path, idx), img)
        cv2.imwrite('{}/{}_mask.png'.format(crops_path, idx), mask)
        if idx % 1000 == 0:
            print("{} crops created".format(idx))
Esempio n. 3
0
 def __init__(self, fold):
     self.fold = fold
     #print('fold {}'.format(fold))
     #print('getting data paths')
     self.X_train_pos, self.X_test_pos = get_data_paths(
         data_positive, test_data_start, numfolds, self.fold)
     self.X_train_neg, self.X_test_neg = get_data_paths(
         data_negative, test_data_start, numfolds, self.fold)
     #print('building vocabulary')
     self.vocab = Counter()
     # filtering words under threshold
     words_to_delete = set()
     ft = 0
     if bigrams:
         #    print('generating vocabulary for bigrams')
         for path in self.X_train_pos + self.X_train_neg:
             message = open_file(path)
             words = get_words(message)
             for i, word in enumerate(words):
                 if i != 0:
                     bigram = words[i - 1] + ' ' + word
                     self.vocab[bigram] += 1
         ft = frequency_cutoff_bigram
         for word in self.vocab:
             if self.vocab[word] < ft:
                 words_to_delete.add(word)
     if unigrams:
         #    print('generating vocabulary for unigrams')
         for path in self.X_train_pos + self.X_train_neg:
             message = open_file(path)
             words = get_words(message)
             for word in words:
                 self.vocab[word] += 1
         ft = frequency_cutoff_unigram
         for word in self.vocab:
             if self.vocab[word] < ft:
                 words_to_delete.add(word)
     for word in words_to_delete:
         del (self.vocab[word])
     #print('initializing rest of variables')
     self.vocab_size = len(set(self.vocab))
     self.p_word_given_pos = dict()
     self.p_word_given_neg = dict()
     self.p_pos = 0
     self.p_neg = 0
     self.k = k
Esempio n. 4
0
def test_overfit():
    """
    Tests that model can overfit on small datasets.
    """
    data_hparams = {'max_paragraph_length': 300, 'max_question_length': 25}
    train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'),
                         **data_hparams)
    dev = SquadDataset(*get_data_paths(FLAGS.data_dir, name='val'),
                       **data_hparams)  # probably not cut

    embed_path = FLAGS.embed_path or pjoin(
        FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size))
    embeddings = np.load(embed_path)['glove']  # 115373

    test_hparams = {
        'learning_rate': 0.01,
        'keep_prob': 1.0,
        'trainable_embeddings': False,
        'clip_gradients': True,
        'max_gradient_norm': 5.0
    }
    model = Baseline(embeddings, test_hparams)

    epochs = 100
    test_size = 32
    steps_per_epoch = 10
    train.question, train.paragraph, train.question_length, train.paragraph_length, train.answer = train[:
                                                                                                         test_size]
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(epochs):
            epoch_start = timer()
            for step in range(steps_per_epoch):
                loss, _ = model.training_step(sess, *train[:test_size])
                if (step == 0 and epoch == 0):
                    print(
                        f'Entropy - Result: {loss:.2f}, Expected (approx.): {2*np.log(FLAGS.max_paragraph_length):.2f}'
                    )
                if step == steps_per_epoch - 1:
                    print(f'Cross entropy: {loss}')
                    train.length = 32
                    print(evaluate(sess, model, train, size=test_size))
            global_step = tf.train.get_global_step().eval()
            print(
                f'Epoch took {timer() - epoch_start:.2f} s (step: {global_step})'
            )
Esempio n. 5
0
def main(_):
    # Load data
    train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'),
                         max_question_length=FLAGS.max_question_length,
                         max_paragraph_length=FLAGS.max_paragraph_length)
    dev = SquadDataset(
        *get_data_paths(FLAGS.data_dir, name='val'),
        max_question_length=FLAGS.max_question_length,
        max_paragraph_length=FLAGS.max_paragraph_length)  # probably not cut
    # TODO convert to TF Dataset API
    # train = tf.convert_to_tensor(train)
    # dev = tf.convert_to_tensor(dev)
    # tf.contrib.data.Dataset()

    # logging.info(f'Train/Dev size {train.length}/{dev.length}')

    # Load embeddings
    embed_path = FLAGS.embed_path or pjoin(
        FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size))
    embeddings = np.load(embed_path)['glove']  # 115373

    is_training = (FLAGS.mode == 'train' or FLAGS.mode == 'overfit')

    # Build model
    if FLAGS.model == 'dcnplus':
        model = DCNPlus(embeddings, FLAGS.__flags, is_training=is_training)
    elif FLAGS.model == 'baseline':
        model = Baseline(embeddings, FLAGS.__flags)
    elif FLAGS.model == 'cat':
        model = Graph(embeddings, is_training=is_training)
    else:
        raise ValueError(f'{FLAGS.model} is not a supported model')

    # Run mode
    if FLAGS.mode == 'train':
        save_flags()
        do_train(model, train)
    elif FLAGS.mode == 'eval':
        do_eval(model, train, dev, evaluate)
    elif FLAGS.mode == 'overfit':
        test_overfit(model, train, evaluate)
    elif FLAGS.mode == 'shell':
        do_shell(model, dev)
    else:
        raise ValueError(f'Incorrect mode entered, {FLAGS.mode}')
Esempio n. 6
0
def main(_):
    # Load data
    train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'),
                         max_question_length=FLAGS.max_question_length,
                         max_paragraph_length=FLAGS.max_paragraph_length)
    dev = SquadDataset(
        *get_data_paths(FLAGS.data_dir, name='val'),
        max_question_length=FLAGS.max_question_length,
        max_paragraph_length=FLAGS.max_paragraph_length)  # probably not cut
    # TODO convert to TF Dataset API
    # train = tf.convert_to_tensor(train)
    # dev = tf.convert_to_tensor(dev)
    # tf.contrib.data.Dataset()

    logging.info(f'Train/Dev size {train.length}/{dev.length}')

    # Load embeddings
    embed_path = FLAGS.embed_path or pjoin(
        FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size))
    embeddings = np.load(embed_path)['glove']  # 115373
    # vocab_path = FLAGS.vocab_path or pjoin(FLAGS.data_dir, "vocab.dat")
    # vocab, rev_vocab = initialize_vocab(vocab_path) # dict, list

    is_training = FLAGS.mode == 'train'

    # Build model
    if FLAGS.model == 'dcnplus':
        model = DCNPlus(embeddings, FLAGS.__flags, is_training=is_training)
    elif FLAGS.model == 'baseline':
        model = Baseline(embeddings, FLAGS.__flags)
    elif FLAGS.model == 'cat':
        model = Graph(embeddings, is_training=is_training)
    else:
        raise ValueError(f'{FLAGS.model} is not a supported model')

    # Run mode
    if FLAGS.mode == 'train':
        with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as f:
            json.dump(FLAGS.__flags, f, indent=4)
        do_train(model, train, dev, evaluate)
    elif FLAGS.mode == 'eval':
        do_eval(model, train, dev, evaluate)
    else:
        raise ValueError(f'Incorrect mode entered, {FLAGS.mode}')
def extract_features_from_folder(
        data_path: str,
        out_path: str,
        chunk_size: int = 4,
        size: float = 0.02) -> None:
    if exists(out_path):
        os.remove(out_path)
    data_paths = get_data_paths(data_path)

    for img_path, mask_path in data_paths:
        extract_features(img_path, mask_path, out_path, chunk_size, size, file_mode='a')
Esempio n. 8
0
def main():
    # environment = KukaCamGymEnv(renders=True,isDiscrete=False)  
    environment = KukaCamGymEnv_Reconfigured(renders=True,isDiscrete=False)  
    environment._reset()
    # num_of_objects = 50
    # num_of_objects_var = 10
    num_of_objects = 20
    num_of_objects_var = 4
    randomObjs = add_random_objs_to_scene(num_of_objects)
    done = False

    try:
        with open("sim_images/serial_num_log.txt",'r') as f:
            img_serial_num = int(f.read())
    except:
        print("read serial number failed!")
        img_serial_num = 0
    step = 0
    snapshot_interval = 50#42#20 before, 42 would make about 10 snapshot per try, like in the real world dataset
    viewMat = get_randomized_ViewMat()#sigma = 0.001
    camInfo = p.getDebugVisualizerCamera()
    # viewMat = camInfo[2]
    projMatrix = camInfo[3]
    action_keys = ["grasp/0/commanded_pose/transforms/base_T_endeffector/vec_quat_7", "grasp/1/commanded_pose/transforms/base_T_endeffector/vec_quat_7"]
    data_folder = "/Users/bozai/Desktop/PixelDA/PixelDA/Data/tfdata"
    file_tail = "22"
    data_path = get_data_paths(data_folder,file_tail)
    commands = commands_iterator(data_path)
    while (not done):    
        attemption = commands.__next__()
        for action_with_quaternion in attemption:
            quaternion = action_with_quaternion[0][3:]
            euler = p.getEulerFromQuaternion(quaternion)#[yaw,pitch,roll]
            action = action_with_quaternion[0][:3].tolist()
            action.append(euler[0])
            action.append(euler[2])
            if step%snapshot_interval==0:
                #get cameta image
                print("Saving image... Current image count: {}".format(img_serial_num))
                img_arr = p.getCameraImage(640,512,viewMatrix=viewMat,projectionMatrix=projMatrix)#640*512*3 
                write_from_imgarr(img_arr, img_serial_num)
                img_serial_num +=1       
            step +=1
            state, reward, done, info = environment.step(action)#state: (256, 341, 4), info: empty dict
            print("step: {} done: {} reward: {}".format(step, done, reward))
        if done:
            environment._reset()
            randomObjs = add_random_objs_to_scene(num_of_objects+random.choice(range(-num_of_objects_var,num_of_objects_var+1)))
            viewMat = get_randomized_ViewMat(sigma = 0.0001)#change view per try, not per image,0.003                                            
            done =False
            print("Environment reseted!")
            with open("sim_images/serial_num_log.txt","w") as f:
                f.write(str(img_serial_num))
Esempio n. 9
0
def main():
    # 1936 x 1216
    input_size = (320, 480, 3)
    classes = 20
    train_dataset_x = '../seg_train_images/seg_train_images'
    train_dataset_y = '../seg_train_annotations/seg_train_annotations'
    test_size = 0.2
    batch_size = 8

    datasets_paths = get_data_paths(train_dataset_x, train_dataset_y)
    train_data, test_data = train_test_split(datasets_paths, test_size=test_size)
    net = Unet(input_size, classes)
    #net = SegNet(input_size, classes)
    net.summary()
    train_gen =  DataGenerator(train_data, input_size, classes, batch_size)
    val_gen =  DataGenerator(test_data, input_size, classes, batch_size)

    callbacks = [
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1),
        EarlyStopping(monitor='val_loss', min_delta=0, patience=9, verbose=1),
        ModelCheckpoint('checkpoint/ep{epoch:03d}-loss{loss:.5f}-val_loss{val_loss:.5f}.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    ]

    net.compile(optimizer=Adam(1e-3),loss=categorical_crossentropy)
    history = net.fit_generator(
        train_gen, 
        steps_per_epoch=train_gen.num_batches_per_epoch,
        validation_data=val_gen,
        validation_steps=val_gen.num_batches_per_epoch,
        initial_epoch=0,
        epochs=50,
        callbacks=callbacks
    )
    net.save_weights('checkpoint/first_stage.h5')

    train_data, test_data = train_test_split(datasets_paths, test_size=test_size)
    train_gen =  DataGenerator(train_data, input_size, classes, batch_size)
    val_gen =  DataGenerator(test_data, input_size, classes, batch_size)

    net.compile(optimizer=Adam(1e-4),loss=categorical_crossentropy)
    history = net.fit_generator(
        train_gen, 
        steps_per_epoch=train_gen.num_batches_per_epoch,
        validation_data=val_gen,
        validation_steps=val_gen.num_batches_per_epoch,
        initial_epoch=50,
        epochs=100,
        callbacks=callbacks
    )
    net.save_weights('checkpoint/final_stage.h5')
Esempio n. 10
0
def dataset_from_dir_sample():
    args = get_data_paths("data/water")
    # args = get_data_paths("data/water_small")

    cnt = 0
    for img, mask in dataset_generator(*args, step_x=56, step_y=56):
        cnt += 1
        print('{})'.format(cnt))

        cv2.imshow("img", img)
        cv2.imshow("mask", mask)
        cv2.waitKey(0)
        # cv2.imwrite('data/splitted_water/ex{}.jpg'.format(cnt), img)

    print('total count:', cnt)
Esempio n. 11
0
def test_reader():
    #TRAIN_FILE_1 = 'sim_images_a1_low_var.tfrecords'
    #TRAIN_FILE_2 = 'sim_images_a2_low_var.tfrecords'
    TRAIN_FILE_1 = get_data_paths("Data/tfdata1", "34")
    # pprint(TRAIN_FILE_1)
    with tf.Graph().as_default():
        reader1 = SplitedReader(TRAIN_FILE_1, batch_size=2)
        #reader1 = Reader(TRAIN_FILE_1, batch_size=2)
        #reader2 = Reader(TRAIN_FILE_2, batch_size=2)
        images_op1 = reader1.feed()
        #images_op2 = reader2.feed()

        sess = tf.Session()
        init = tf.global_variables_initializer()
        sess.run(init)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            step = 0
            while not coord.should_stop():
                #batch_images1, batch_images2 = sess.run([images_op1, images_op2])
                batch_images1 = sess.run(images_op1)
                print("image shape: {}".format(batch_images1))
                #print("image shape: {}".format(batch_images2))
                print("=" * 10)
                step += 1
        except KeyboardInterrupt:
            print('Interrupted')
            coord.request_stop()
        except Exception as e:
            coord.request_stop(e)
        finally:
            # When done, ask the threads to stop.
            coord.request_stop()
            coord.join(threads)
if __name__ == "__main__":
    args = get_args()

    if args.use_cuda:
        try:
            device = torch.device('cuda')
        except Exception as e:
            print("Check if you have correct nvidia driver installed!")
            print(e)
            device = None  # throw error when loading model on device. Do not let run on cpu with gpu resources on cloud!
    else:
        device = torch.device('cpu')

    torch.manual_seed(args.seed)

    model = utils.get_model().to(device)

    train_file_path, test_file_path = utils.get_data_paths(args.data_dir)
    train_set = utils.ImageDataset(train_file_path, aug_images=True)
    train_loader = utils.image_loader(train_set, args.batch_size, True)
    test_set = utils.ImageDataset(test_file_path, aug_images=False)
    test_loader = utils.image_loader(test_set, args.test_batch_size, False)

    loss_func = torch.nn.CrossEntropyLoss(reduction='mean')
    optimizer = torch.optim.Adam(model.fc.parameters(), lr=args.lr)
    model, best_performance_metrics, log_df = train(args, model, train_loader,
                                                    test_loader, loss_func,
                                                    optimizer, device)
    utils.save_model(args.model_dir, model)
    utils.save_job_log(args.log_dir, log_df, best_performance_metrics)
Esempio n. 13
0
def main(_):
    """ Typical usage

    For <model_name> see your folder name in ../checkpoints. 

    Training
    ``` sh
    $ python main.py --mode train --model <model> (if restoring or naming a model: --model_name <model_name>)
    ```
    
    Evaluation
    ``` sh
    $ python main.py --mode eval --model <model> --model_name <model_name>
    ```

    Shell
    ``` sh
    $ python main.py --mode shell --model <model> --model_name <model_name>
    ```
    """
    # Load data
    train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'),
                         max_question_length=FLAGS.max_question_length,
                         max_paragraph_length=FLAGS.max_paragraph_length)
    dev = SquadDataset(*get_data_paths(FLAGS.data_dir, name='val'),
                       max_question_length=FLAGS.max_question_length,
                       max_paragraph_length=FLAGS.max_paragraph_length
                       )  # change to eval to zero if too long

    logging.info(f'Train/Dev size {train.length}/{dev.length}')

    # Load embeddings
    embed_path = FLAGS.embed_path or pjoin(
        FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size))
    embeddings = np.load(embed_path)['glove']  # 115373

    if FLAGS.use_siamese:
        # get config file for siamese model
        siamese_config = '../../paraphrase-id-tensorflow-master/logs/baseline_siamese/{}/trainparams.json'.format(
            FLAGS.siamese_model_num)
        with open(siamese_config, 'r') as f:
            siamese_config = json.load(f)
            siamese_config['mode'] = 'test'

        checkpoint_dir = '../../paraphrase-id-tensorflow-master/models/baseline_siamese/{}/'.format(
            FLAGS.siamese_model_num)
        # siamese_graph = ImportGraph(checkpoint_dir, embeddings)
        siamese_graph = ImportModel(checkpoint_dir, siamese_config, embeddings)

    # Build model
    if FLAGS.model in ('baseline', 'mixed', 'dcnplus', 'dcn'):
        # with tf.variable_scope('dcn'):
        model = DCN(embeddings,
                    FLAGS.__flags,
                    siamese_output_dim=siamese_config['rnn_hidden_size'])
    elif FLAGS.model == 'cat':
        from networks.cat import Graph
        model = Graph(embeddings)
    else:
        raise ValueError(f'{FLAGS.model} is not a supported model')

    # Run mode
    if FLAGS.mode == 'train':
        save_flags()
        do_train(model, train, dev, input_model=siamese_graph)
    elif FLAGS.mode == 'eval':
        do_eval(model, train, dev, input_model=siamese_graph)
    elif FLAGS.mode == 'overfit':
        test_overfit(model, train, input_model=siamese_graph)
    elif FLAGS.mode == 'shell':
        do_shell(model, dev, input_model=siamese_graph)
    else:
        raise ValueError(f'Incorrect mode entered, {FLAGS.mode}')
Esempio n. 14
0
def setting_simulation_env():
    time_step = 1./240.
    num_of_objects = 20
    num_of_objects_var = 4
    # p.connect(p.GUI)
    reset_sim_env()
   
    viewMat = get_randomized_ViewMat()#sigma = 0.001
    # projMatrix = [  [-0.0210269,  -0.99247903,  0.120598,    0.26878399],
    #             [-0.88814598, -0.0368459,  -0.45808199,  0.293412  ],
    #             [ 0.45908001, -0.116741,   -0.88069099,  0.71057898],
    #             [ 0.,          0.,          0.,          1.        ]]
    # camInfo = p.getDebugVisualizerCamera()# viewMat = camInfo[2]
    # projMatrix = camInfo[3]
    projMatrix = get_ProjMat()
    # action_keys = ["grasp/0/commanded_pose/transforms/base_T_endeffector/vec_quat_7", "grasp/1/commanded_pose/transforms/base_T_endeffector/vec_quat_7"]
    data_folder = "/Users/bozai/Desktop/PixelDA/PixelDA/Data/tfdata"
    file_tail = "22"
    data_path = get_data_paths(data_folder,file_tail)
    commands = commands_iterator(data_path)
    # try:
    #     with open("sim_images/serial_num_log.txt",'r') as f:
    #         img_serial_num = int(f.read())
    # except:
    #     print("read serial number failed!")
    #     img_serial_num = 0
    img_serial_num = 0
    kukaEndEffectorIndex = 6
    #joint damping coefficents
    jd=[0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001]
    while True:
        kuka_arm = reset_sim_env()
        kukaUid = kuka_arm.kukaUid
        # numJoints = kuka_arm.numJoints
        
        attemption, image = commands.__next__()
        randomObjs = add_random_objs_to_scene(num_of_objects+random.choice(range(-num_of_objects_var,num_of_objects_var+1)))
        for action_with_quaternion in attemption:
            quaternion = action_with_quaternion[0][3:]
            pos = action_with_quaternion[0][:3]
            jointPoses = p.calculateInverseKinematics(kukaUid,kukaEndEffectorIndex,pos,quaternion,jointDamping=jd)
            # print("numJoints : {}, jointPoses: {}".format(numJoints,len(jointPoses)))
            for i in range (12):
                p.resetJointState(kukaUid,i,jointPoses[i])    
            # time.sleep(30)
            # quaternion = action_with_quaternion[0][3:]
            # euler = p.getEulerFromQuaternion(quaternion)#[yaw,pitch,roll]
            # action = action_with_quaternion[0][:3].tolist()
            # action.append(euler[0])
            # action.append(euler[2])
            # kuka_arm.applyAction(action)#bug in the module implementation
            print("Saving image... Current image count: {}".format(img_serial_num))
            img_arr = p.getCameraImage(640,512,viewMatrix=viewMat,projectionMatrix=projMatrix,lightDirection=[1,1,1])#640*512*3 
            # write_from_imgarr(img_arr, img_serial_num)
            subed = substitute_from_imgarr(img_arr,image)
            
            subed = cv2.cvtColor(subed, cv2.COLOR_RGB2BGR)
            cv2.imwrite("sim_backSubed_test/{0:0>6}_subed_test.jpeg".format(img_serial_num),subed)

            #save original image and mask as well
            bgra =img_arr[2]#imgarr[3] depth image; imgarr[4] segmentation mask
            img = np.reshape(bgra, (512, 640, 4)).astype(np.uint8)#BGRA
            img = cv2.cvtColor(img,cv2.COLOR_BGRA2RGB)
            segmentation_mask = img_arr[4]
            segmentation_mask = np.reshape(segmentation_mask,(512,640,1)).astype(np.uint8)
            seg_fig = plt.imshow(segmentation_mask[:,:,0], interpolation='nearest', aspect='equal')
            plt.axis('off')
            plt.savefig("sim_backSubed_test/{0:0>6}_segmentation_test.jpeg".format(img_serial_num), bbox_inches='tight')

            # seg_fig = plt.imsave("sim_backSubed_test/{0:0>6}_segmentation_test.jpeg".format(img_serial_num),segmentation_mask)
            # segmentation_mask = (255*np.reshape(segmentation_mask,(512,640))/np.max(segmentation_mask)).astype(np.uint8)
            # segmentation_mask = np.dstack((segmentation_mask,segmentation_mask,segmentation_mask))
            cv2.imwrite("sim_backSubed_test/{0:0>6}_original_test.jpeg".format(img_serial_num),img)
            # cv2.imwrite("sim_backSubed_test/{0:0>6}_segmentation_test.jpeg".format(img_serial_num),segmentation_mask)

            img_serial_num +=1
        if img_serial_num>20:#40000
            with open("sim_images/serial_num_log.txt","w") as f:
                f.write(str(img_serial_num))
            break     
    """ Pads data of format `(sequence, labels)` to `max_length` sequence length
        and returns a triplet `(sequence_, labels_, mask)`. If
        the length of the sequence is longer than `max_length` then it 
        is truncated to `max_length`.
        """

    # create padding vectors
    sequence_padding = PAD_ID

    pad_length = max([0, max_length - len(sequence)])
    padded_sequence = sequence[:max_length]
    padded_sequence.extend([sequence_padding] * pad_length)
    length = min([len(sequence), max_length])

    return padded_sequence, length


def pad_sequences(sequences, max_length):
    padded_sequences, lengths = zip(
        *[pad_sequence(sequence, max_length) for sequence in sequences])
    return padded_sequences, lengths


if __name__ == '__main__':
    from utils import get_data_paths
    import os
    data_dir = os.path.join("..", "data", "squad")
    dataset = SquadDataset(*get_data_paths(data_dir, name='train'), 10)
    print(dataset.get_batch(2))
    print(dataset[:2])
import utils
import numpy as np

# get all the datapaths in the dataset
path = '..\\midis\\'
try:
    data_paths = [os.path.join(path, o) \
                  for o in os.listdir(path) \
                  if os.path.isdir(os.path.join(path, o))]
except OSError as e:
    print('Error: Invalid datapath!!!')

count = 0

# copy the data files in to data\\train and data\\validation
# the file names are changed to be index numbers of the music pieces.
for data_path in data_paths:
    midi_datas = utils.get_data_paths(data_path)
    
    for midi_data in midi_datas:

    	data_cur = np.load(midi_data)
    	if count % 5 == 4:
    		np.save('data\\validation\\{}.npy'.format(count), data_cur)
    	else:
    		np.save('data\\train\\{}.npy'.format(count), data_cur)

    	count += 1

    print(data_path + ' done!')
Esempio n. 17
0
utils.generate_original_images(dataset_name)

#   Generate training set
train_name_list = ['airplane'
                   ]  # optional, default is in generate_train_images()
utils.generate_train_images(dataset_name, train_name_list)
# utils.generate_train_images(dataset_name)

#   Generate test set
# test_name_list = ['fruits', 'frymire']
# utils.generate_test_images(dataset_name, test_name_list)
utils.generate_test_images(dataset_name)

# Pre-process data
#   Load train and test sets
data_paths = utils.get_data_paths(dataset_name)
train_image_list, train_name_list = utils.load_images(data_paths['train'],
                                                      file_ext='.png')
test_image_list, test_name_list = utils.load_images(data_paths['test'],
                                                    file_ext='.png')

#   Split in non-overlapping patches and vectorize
test_set_ref = utils.generate_vec_set(test_image_list, patch_size)

full_train_set_ref = utils.generate_vec_set(train_image_list, patch_size)
train_set_ref, val_set_ref \
    = utils.generate_cross_validation_sets(full_train_set_ref, fold_number=5, fold_combination=5)

#   Mix and compress train and test sets
mm_type = 'gaussian-rip'  # or 'bernoulli-rip'
M = utils.create_measurement_model(mm_type, patch_size, compression_percent)
Esempio n. 18
0
def train():
    if FLAGS.load_model is not None:
        checkpoints_dir = "checkpoints/" + FLAGS.load_model.lstrip(
            "checkpoints/")
    else:
        current_time = datetime.now().strftime("%Y%m%d-%H%M")
        checkpoints_dir = "checkpoints/{}".format(current_time)
        try:
            os.makedirs(checkpoints_dir)
        except os.error:
            pass

    data_folder = "Data/tfdata"
    file_tail = "22"
    target_path = get_data_paths(data_folder, file_tail)

    graph = tf.Graph()
    with graph.as_default():
        cycle_gan = CycleGAN(
            X_train_file=FLAGS.X,
            #Y_train_file=FLAGS.Y,
            Y_train_file=target_path,
            batch_size=FLAGS.batch_size,
            image_size=np.array([512, 640]),
            use_lsgan=FLAGS.use_lsgan,
            norm=FLAGS.norm,
            lambda1=FLAGS.lambda1,
            lambda2=FLAGS.lambda2,
            learning_rate=FLAGS.learning_rate,
            beta1=FLAGS.beta1,
            ngf=FLAGS.ngf)
        G_loss, D_Y_loss, F_loss, D_X_loss, fake_y, fake_x = cycle_gan.model()
        optimizers = cycle_gan.optimize(G_loss, D_Y_loss, F_loss, D_X_loss)

        summary_op = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(checkpoints_dir, graph)
        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        if FLAGS.load_model is not None:  #load existing model
            checkpoint = tf.train.get_checkpoint_state(checkpoints_dir)
            meta_graph_path = checkpoint.model_checkpoint_path + ".meta"
            restore = tf.train.import_meta_graph(meta_graph_path)
            restore.restore(sess, tf.train.latest_checkpoint(checkpoints_dir))
            step = int(meta_graph_path.split("-")[2].split(".")[0])
        else:  #run a new one
            sess.run(tf.global_variables_initializer())
            step = 0

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            fake_Y_pool = ImagePool(FLAGS.pool_size)
            fake_X_pool = ImagePool(FLAGS.pool_size)

            while not coord.should_stop():
                # get previously generated images
                fake_y_val, fake_x_val = sess.run([fake_y, fake_x])

                # train
                _, G_loss_val, D_Y_loss_val, F_loss_val, D_X_loss_val, summary = (
                    sess.run(
                        [
                            optimizers, G_loss, D_Y_loss, F_loss, D_X_loss,
                            summary_op
                        ],
                        feed_dict={
                            cycle_gan.fake_y: fake_Y_pool.query(fake_y_val),
                            cycle_gan.fake_x: fake_X_pool.query(fake_x_val)
                        }))

                train_writer.add_summary(summary, step)
                train_writer.flush()

                if step % 100 == 0:
                    logging.info('-----------Step %d:-------------' % step)
                    logging.info('  G_loss   : {}'.format(G_loss_val))
                    logging.info('  D_Y_loss : {}'.format(D_Y_loss_val))
                    logging.info('  F_loss   : {}'.format(F_loss_val))
                    logging.info('  D_X_loss : {}'.format(D_X_loss_val))

                if step % 10000 == 0:
                    save_path = saver.save(sess,
                                           checkpoints_dir + "/model.ckpt",
                                           global_step=step)
                    logging.info("Model saved in file: %s" % save_path)

                step += 1

        except KeyboardInterrupt:
            logging.info('Interrupted')
            coord.request_stop()
        except Exception as e:
            coord.request_stop(e)
        finally:
            save_path = saver.save(sess,
                                   checkpoints_dir + "/model.ckpt",
                                   global_step=step)
            logging.info("Model saved in file: %s" % save_path)
            # When done, ask the threads to stop.
            coord.request_stop()
            coord.join(threads)
Esempio n. 19
0

# Train model--------------------
model, LossHistory = nnmodel.getNNModel()
history = LossHistory()
optimizer = Adam(lr=1e-4)
model.compile(optimizer, loss="mse")
plot(model, to_file='model.png')
stopping_callback = EarlyStopping(patience=5)

train_data_path = utils.train_data_path  #'../data/Challenge 2/train'
test_data_path = utils.test_data_path  #'../data/Challenge 2/test'
ch, width, height = utils.ch, utils.width, utils.height

print "Preparing training and validation data..."
train_paths = utils.get_data_paths(train_data_path)
train_path_list, valid_path_list = utils.split_train_and_validate(
    train_paths, 0.8)  # Use 80% for training, 20% for validation

# Get list of training images
train_images_df = utils.get_image_df(train_path_list).head(1024)
num_train_images = train_images_df.shape[0]
print "Found {} training images.".format(num_train_images)

# Get list of validation images
valid_images_df = utils.get_image_df(valid_path_list).head(1024)
num_valid_images = valid_images_df.shape[0]
print "Found {} validation images.".format(num_valid_images)

# Save validation images for use by the viewer later
valid_images_df.to_csv(
Esempio n. 20
0
            tokenized_data = word_tokenize(raw_data.lower())
            #print(tokenized_data)
            vector = model.infer_vector(tokenized_data)
            toRet.append(vector)
            toRet2.append(posorneg)
    return toRet, toRet2

for MODELFILE in models:
    print(MODELFILE)
    accuracies = []
    outfile.write(MODELFILE + "\n")
    for fold in range(numfolds):
        # get data
        print("\nfold {}".format(fold))
        print("obtaining data")
        train_pos, test_pos = get_data_paths(data_positive, numfolds, fold)  
        train_neg, test_neg = get_data_paths(data_negative, numfolds, fold)

        # preprocess data
        print("preprocessing data")
        start = time.time()
        pos_features, pos_labels = convertData(train_pos, MODELFILE, POS)
        neg_features, neg_labels = convertData(train_neg, MODELFILE, NEG)
        features = pos_features + neg_features
        labels = pos_labels + neg_labels
        # random shuffle
        z = list(zip(features, labels))
        random.shuffle(z)
        features[:], labels[:] = zip(*z)
        #print(time.time() - start)
Esempio n. 21
0
def prepare_data(
        source_path: str,
        mask_converter: Callable[[np.ndarray],
                                 np.ndarray] = convert_to_binary_water,
        only_distinct: bool = True,
        test_size: float = 0.2,
        step_x: int = 224,
        step_y: int = 224,
        size_x: int = 224,
        size_y: int = 224,
        verbose: bool = True):
    def print_if_verbose(text):
        if verbose:
            print(text)

    tmp_dir_path = '{}_tmp'.format(source_path)
    train_dir_path = '{}_train'.format(source_path)
    test_dir_path = '{}_test'.format(source_path)

    if exists(train_dir_path):
        shutil.rmtree(train_dir_path)
    if exists(test_dir_path):
        shutil.rmtree(test_dir_path)
    if exists(tmp_dir_path):
        shutil.rmtree(tmp_dir_path)

    os.makedirs(tmp_dir_path)
    os.makedirs(train_dir_path)
    os.makedirs(test_dir_path)

    args = get_data_paths(source_path)
    generator = dataset_generator(*args,
                                  size_x=size_x,
                                  size_y=size_y,
                                  step_x=step_x,
                                  step_y=step_y,
                                  mask_converter=mask_converter)

    print_if_verbose('Writing images to tmp folder...')
    # writing all images to tmp folder
    # TODO try to replace with zip infinite generator
    n = 0
    for img, mask in generator:
        if not only_distinct or have_diff_cols(mask):
            cv2.imwrite('{}/{}_img.jpg'.format(tmp_dir_path, n), img)
            cv2.imwrite('{}/{}_mask.png'.format(tmp_dir_path, n), mask)
            n += 1

    print_if_verbose("Done!")

    indices = list(range(n))
    shuffle(indices)
    test_cnt = int(n * test_size)

    train_indices = indices[test_cnt:]
    test_indices = indices[0:test_cnt]

    print_if_verbose("Writing to train folder...")
    copy_from_tmp_folder(tmp_dir_path, train_dir_path, train_indices)
    print_if_verbose("Done!")

    print_if_verbose("Writing to test folder...")
    copy_from_tmp_folder(tmp_dir_path, test_dir_path, test_indices)
    print_if_verbose("Done!")

    shutil.rmtree(tmp_dir_path)