def main(_): """ Typical usage For <model_name> see your folder name in ../checkpoints. Training ``` sh $ python main.py --mode train --model <model> (if restoring or naming a model: --model_name <model_name>) ``` Evaluation ``` sh $ python main.py --mode eval --model <model> --model_name <model_name> ``` Shell ``` sh $ python main.py --mode shell --model <model> --model_name <model_name> ``` """ # Load data train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'), max_question_length=FLAGS.max_question_length, max_paragraph_length=FLAGS.max_paragraph_length) dev = SquadDataset(*get_data_paths(FLAGS.data_dir, name='val'), max_question_length=FLAGS.max_question_length, max_paragraph_length=FLAGS.max_paragraph_length ) # change to eval to zero if too long logging.info(f'Train/Dev size {train.length}/{dev.length}') # Load embeddings embed_path = FLAGS.embed_path or pjoin( FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) embeddings = np.load(embed_path)['glove'] # 115373 # Build model if FLAGS.model in ('baseline', 'mixed', 'dcnplus', 'dcn'): model = DCN(embeddings, FLAGS.__flags) elif FLAGS.model == 'cat': from networks.cat import Graph model = Graph(embeddings) else: raise ValueError(f'{FLAGS.model} is not a supported model') # Run mode if FLAGS.mode == 'train': save_flags() do_train(model, train, dev) elif FLAGS.mode == 'eval': do_eval(model, train, dev) elif FLAGS.mode == 'overfit': test_overfit(model, train) elif FLAGS.mode == 'shell': do_shell(model, dev) else: raise ValueError(f'Incorrect mode entered, {FLAGS.mode}')
def create_crops(source_path: str, size_x: int, size_y: int, step_x: int, step_y: int, mask_converter: Callable[[np.ndarray], np.ndarray] = identity): crops_path = '{}_crops{}x{}'.format(source_path, size_x, size_y) clear_and_create(crops_path) def pair_creator(img_name: str) -> Tuple[str, str]: origin_name = '{}.{}'.format(img_name, 'jpg') mask_name = re.sub('_img', '_mask.png', img_name) origin_path = join(source_path, origin_name) mask_path = join(source_path, mask_name) return origin_path, mask_path args = get_data_paths(source_path, pair_creator=pair_creator) print('creating_generator...') generator = dataset_generator(*args, size_x=size_x, size_y=size_y, step_x=step_x, step_y=step_y, mask_converter=mask_converter) print('generator has been created') for idx, (img, mask) in enumerate(generator): cv2.imwrite('{}/{}_img.jpg'.format(crops_path, idx), img) cv2.imwrite('{}/{}_mask.png'.format(crops_path, idx), mask) if idx % 1000 == 0: print("{} crops created".format(idx))
def __init__(self, fold): self.fold = fold #print('fold {}'.format(fold)) #print('getting data paths') self.X_train_pos, self.X_test_pos = get_data_paths( data_positive, test_data_start, numfolds, self.fold) self.X_train_neg, self.X_test_neg = get_data_paths( data_negative, test_data_start, numfolds, self.fold) #print('building vocabulary') self.vocab = Counter() # filtering words under threshold words_to_delete = set() ft = 0 if bigrams: # print('generating vocabulary for bigrams') for path in self.X_train_pos + self.X_train_neg: message = open_file(path) words = get_words(message) for i, word in enumerate(words): if i != 0: bigram = words[i - 1] + ' ' + word self.vocab[bigram] += 1 ft = frequency_cutoff_bigram for word in self.vocab: if self.vocab[word] < ft: words_to_delete.add(word) if unigrams: # print('generating vocabulary for unigrams') for path in self.X_train_pos + self.X_train_neg: message = open_file(path) words = get_words(message) for word in words: self.vocab[word] += 1 ft = frequency_cutoff_unigram for word in self.vocab: if self.vocab[word] < ft: words_to_delete.add(word) for word in words_to_delete: del (self.vocab[word]) #print('initializing rest of variables') self.vocab_size = len(set(self.vocab)) self.p_word_given_pos = dict() self.p_word_given_neg = dict() self.p_pos = 0 self.p_neg = 0 self.k = k
def test_overfit(): """ Tests that model can overfit on small datasets. """ data_hparams = {'max_paragraph_length': 300, 'max_question_length': 25} train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'), **data_hparams) dev = SquadDataset(*get_data_paths(FLAGS.data_dir, name='val'), **data_hparams) # probably not cut embed_path = FLAGS.embed_path or pjoin( FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) embeddings = np.load(embed_path)['glove'] # 115373 test_hparams = { 'learning_rate': 0.01, 'keep_prob': 1.0, 'trainable_embeddings': False, 'clip_gradients': True, 'max_gradient_norm': 5.0 } model = Baseline(embeddings, test_hparams) epochs = 100 test_size = 32 steps_per_epoch = 10 train.question, train.paragraph, train.question_length, train.paragraph_length, train.answer = train[: test_size] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(epochs): epoch_start = timer() for step in range(steps_per_epoch): loss, _ = model.training_step(sess, *train[:test_size]) if (step == 0 and epoch == 0): print( f'Entropy - Result: {loss:.2f}, Expected (approx.): {2*np.log(FLAGS.max_paragraph_length):.2f}' ) if step == steps_per_epoch - 1: print(f'Cross entropy: {loss}') train.length = 32 print(evaluate(sess, model, train, size=test_size)) global_step = tf.train.get_global_step().eval() print( f'Epoch took {timer() - epoch_start:.2f} s (step: {global_step})' )
def main(_): # Load data train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'), max_question_length=FLAGS.max_question_length, max_paragraph_length=FLAGS.max_paragraph_length) dev = SquadDataset( *get_data_paths(FLAGS.data_dir, name='val'), max_question_length=FLAGS.max_question_length, max_paragraph_length=FLAGS.max_paragraph_length) # probably not cut # TODO convert to TF Dataset API # train = tf.convert_to_tensor(train) # dev = tf.convert_to_tensor(dev) # tf.contrib.data.Dataset() # logging.info(f'Train/Dev size {train.length}/{dev.length}') # Load embeddings embed_path = FLAGS.embed_path or pjoin( FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) embeddings = np.load(embed_path)['glove'] # 115373 is_training = (FLAGS.mode == 'train' or FLAGS.mode == 'overfit') # Build model if FLAGS.model == 'dcnplus': model = DCNPlus(embeddings, FLAGS.__flags, is_training=is_training) elif FLAGS.model == 'baseline': model = Baseline(embeddings, FLAGS.__flags) elif FLAGS.model == 'cat': model = Graph(embeddings, is_training=is_training) else: raise ValueError(f'{FLAGS.model} is not a supported model') # Run mode if FLAGS.mode == 'train': save_flags() do_train(model, train) elif FLAGS.mode == 'eval': do_eval(model, train, dev, evaluate) elif FLAGS.mode == 'overfit': test_overfit(model, train, evaluate) elif FLAGS.mode == 'shell': do_shell(model, dev) else: raise ValueError(f'Incorrect mode entered, {FLAGS.mode}')
def main(_): # Load data train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'), max_question_length=FLAGS.max_question_length, max_paragraph_length=FLAGS.max_paragraph_length) dev = SquadDataset( *get_data_paths(FLAGS.data_dir, name='val'), max_question_length=FLAGS.max_question_length, max_paragraph_length=FLAGS.max_paragraph_length) # probably not cut # TODO convert to TF Dataset API # train = tf.convert_to_tensor(train) # dev = tf.convert_to_tensor(dev) # tf.contrib.data.Dataset() logging.info(f'Train/Dev size {train.length}/{dev.length}') # Load embeddings embed_path = FLAGS.embed_path or pjoin( FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) embeddings = np.load(embed_path)['glove'] # 115373 # vocab_path = FLAGS.vocab_path or pjoin(FLAGS.data_dir, "vocab.dat") # vocab, rev_vocab = initialize_vocab(vocab_path) # dict, list is_training = FLAGS.mode == 'train' # Build model if FLAGS.model == 'dcnplus': model = DCNPlus(embeddings, FLAGS.__flags, is_training=is_training) elif FLAGS.model == 'baseline': model = Baseline(embeddings, FLAGS.__flags) elif FLAGS.model == 'cat': model = Graph(embeddings, is_training=is_training) else: raise ValueError(f'{FLAGS.model} is not a supported model') # Run mode if FLAGS.mode == 'train': with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as f: json.dump(FLAGS.__flags, f, indent=4) do_train(model, train, dev, evaluate) elif FLAGS.mode == 'eval': do_eval(model, train, dev, evaluate) else: raise ValueError(f'Incorrect mode entered, {FLAGS.mode}')
def extract_features_from_folder( data_path: str, out_path: str, chunk_size: int = 4, size: float = 0.02) -> None: if exists(out_path): os.remove(out_path) data_paths = get_data_paths(data_path) for img_path, mask_path in data_paths: extract_features(img_path, mask_path, out_path, chunk_size, size, file_mode='a')
def main(): # environment = KukaCamGymEnv(renders=True,isDiscrete=False) environment = KukaCamGymEnv_Reconfigured(renders=True,isDiscrete=False) environment._reset() # num_of_objects = 50 # num_of_objects_var = 10 num_of_objects = 20 num_of_objects_var = 4 randomObjs = add_random_objs_to_scene(num_of_objects) done = False try: with open("sim_images/serial_num_log.txt",'r') as f: img_serial_num = int(f.read()) except: print("read serial number failed!") img_serial_num = 0 step = 0 snapshot_interval = 50#42#20 before, 42 would make about 10 snapshot per try, like in the real world dataset viewMat = get_randomized_ViewMat()#sigma = 0.001 camInfo = p.getDebugVisualizerCamera() # viewMat = camInfo[2] projMatrix = camInfo[3] action_keys = ["grasp/0/commanded_pose/transforms/base_T_endeffector/vec_quat_7", "grasp/1/commanded_pose/transforms/base_T_endeffector/vec_quat_7"] data_folder = "/Users/bozai/Desktop/PixelDA/PixelDA/Data/tfdata" file_tail = "22" data_path = get_data_paths(data_folder,file_tail) commands = commands_iterator(data_path) while (not done): attemption = commands.__next__() for action_with_quaternion in attemption: quaternion = action_with_quaternion[0][3:] euler = p.getEulerFromQuaternion(quaternion)#[yaw,pitch,roll] action = action_with_quaternion[0][:3].tolist() action.append(euler[0]) action.append(euler[2]) if step%snapshot_interval==0: #get cameta image print("Saving image... Current image count: {}".format(img_serial_num)) img_arr = p.getCameraImage(640,512,viewMatrix=viewMat,projectionMatrix=projMatrix)#640*512*3 write_from_imgarr(img_arr, img_serial_num) img_serial_num +=1 step +=1 state, reward, done, info = environment.step(action)#state: (256, 341, 4), info: empty dict print("step: {} done: {} reward: {}".format(step, done, reward)) if done: environment._reset() randomObjs = add_random_objs_to_scene(num_of_objects+random.choice(range(-num_of_objects_var,num_of_objects_var+1))) viewMat = get_randomized_ViewMat(sigma = 0.0001)#change view per try, not per image,0.003 done =False print("Environment reseted!") with open("sim_images/serial_num_log.txt","w") as f: f.write(str(img_serial_num))
def main(): # 1936 x 1216 input_size = (320, 480, 3) classes = 20 train_dataset_x = '../seg_train_images/seg_train_images' train_dataset_y = '../seg_train_annotations/seg_train_annotations' test_size = 0.2 batch_size = 8 datasets_paths = get_data_paths(train_dataset_x, train_dataset_y) train_data, test_data = train_test_split(datasets_paths, test_size=test_size) net = Unet(input_size, classes) #net = SegNet(input_size, classes) net.summary() train_gen = DataGenerator(train_data, input_size, classes, batch_size) val_gen = DataGenerator(test_data, input_size, classes, batch_size) callbacks = [ ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1), EarlyStopping(monitor='val_loss', min_delta=0, patience=9, verbose=1), ModelCheckpoint('checkpoint/ep{epoch:03d}-loss{loss:.5f}-val_loss{val_loss:.5f}.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min') ] net.compile(optimizer=Adam(1e-3),loss=categorical_crossentropy) history = net.fit_generator( train_gen, steps_per_epoch=train_gen.num_batches_per_epoch, validation_data=val_gen, validation_steps=val_gen.num_batches_per_epoch, initial_epoch=0, epochs=50, callbacks=callbacks ) net.save_weights('checkpoint/first_stage.h5') train_data, test_data = train_test_split(datasets_paths, test_size=test_size) train_gen = DataGenerator(train_data, input_size, classes, batch_size) val_gen = DataGenerator(test_data, input_size, classes, batch_size) net.compile(optimizer=Adam(1e-4),loss=categorical_crossentropy) history = net.fit_generator( train_gen, steps_per_epoch=train_gen.num_batches_per_epoch, validation_data=val_gen, validation_steps=val_gen.num_batches_per_epoch, initial_epoch=50, epochs=100, callbacks=callbacks ) net.save_weights('checkpoint/final_stage.h5')
def dataset_from_dir_sample(): args = get_data_paths("data/water") # args = get_data_paths("data/water_small") cnt = 0 for img, mask in dataset_generator(*args, step_x=56, step_y=56): cnt += 1 print('{})'.format(cnt)) cv2.imshow("img", img) cv2.imshow("mask", mask) cv2.waitKey(0) # cv2.imwrite('data/splitted_water/ex{}.jpg'.format(cnt), img) print('total count:', cnt)
def test_reader(): #TRAIN_FILE_1 = 'sim_images_a1_low_var.tfrecords' #TRAIN_FILE_2 = 'sim_images_a2_low_var.tfrecords' TRAIN_FILE_1 = get_data_paths("Data/tfdata1", "34") # pprint(TRAIN_FILE_1) with tf.Graph().as_default(): reader1 = SplitedReader(TRAIN_FILE_1, batch_size=2) #reader1 = Reader(TRAIN_FILE_1, batch_size=2) #reader2 = Reader(TRAIN_FILE_2, batch_size=2) images_op1 = reader1.feed() #images_op2 = reader2.feed() sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: step = 0 while not coord.should_stop(): #batch_images1, batch_images2 = sess.run([images_op1, images_op2]) batch_images1 = sess.run(images_op1) print("image shape: {}".format(batch_images1)) #print("image shape: {}".format(batch_images2)) print("=" * 10) step += 1 except KeyboardInterrupt: print('Interrupted') coord.request_stop() except Exception as e: coord.request_stop(e) finally: # When done, ask the threads to stop. coord.request_stop() coord.join(threads)
if __name__ == "__main__": args = get_args() if args.use_cuda: try: device = torch.device('cuda') except Exception as e: print("Check if you have correct nvidia driver installed!") print(e) device = None # throw error when loading model on device. Do not let run on cpu with gpu resources on cloud! else: device = torch.device('cpu') torch.manual_seed(args.seed) model = utils.get_model().to(device) train_file_path, test_file_path = utils.get_data_paths(args.data_dir) train_set = utils.ImageDataset(train_file_path, aug_images=True) train_loader = utils.image_loader(train_set, args.batch_size, True) test_set = utils.ImageDataset(test_file_path, aug_images=False) test_loader = utils.image_loader(test_set, args.test_batch_size, False) loss_func = torch.nn.CrossEntropyLoss(reduction='mean') optimizer = torch.optim.Adam(model.fc.parameters(), lr=args.lr) model, best_performance_metrics, log_df = train(args, model, train_loader, test_loader, loss_func, optimizer, device) utils.save_model(args.model_dir, model) utils.save_job_log(args.log_dir, log_df, best_performance_metrics)
def main(_): """ Typical usage For <model_name> see your folder name in ../checkpoints. Training ``` sh $ python main.py --mode train --model <model> (if restoring or naming a model: --model_name <model_name>) ``` Evaluation ``` sh $ python main.py --mode eval --model <model> --model_name <model_name> ``` Shell ``` sh $ python main.py --mode shell --model <model> --model_name <model_name> ``` """ # Load data train = SquadDataset(*get_data_paths(FLAGS.data_dir, name='train'), max_question_length=FLAGS.max_question_length, max_paragraph_length=FLAGS.max_paragraph_length) dev = SquadDataset(*get_data_paths(FLAGS.data_dir, name='val'), max_question_length=FLAGS.max_question_length, max_paragraph_length=FLAGS.max_paragraph_length ) # change to eval to zero if too long logging.info(f'Train/Dev size {train.length}/{dev.length}') # Load embeddings embed_path = FLAGS.embed_path or pjoin( FLAGS.data_dir, "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) embeddings = np.load(embed_path)['glove'] # 115373 if FLAGS.use_siamese: # get config file for siamese model siamese_config = '../../paraphrase-id-tensorflow-master/logs/baseline_siamese/{}/trainparams.json'.format( FLAGS.siamese_model_num) with open(siamese_config, 'r') as f: siamese_config = json.load(f) siamese_config['mode'] = 'test' checkpoint_dir = '../../paraphrase-id-tensorflow-master/models/baseline_siamese/{}/'.format( FLAGS.siamese_model_num) # siamese_graph = ImportGraph(checkpoint_dir, embeddings) siamese_graph = ImportModel(checkpoint_dir, siamese_config, embeddings) # Build model if FLAGS.model in ('baseline', 'mixed', 'dcnplus', 'dcn'): # with tf.variable_scope('dcn'): model = DCN(embeddings, FLAGS.__flags, siamese_output_dim=siamese_config['rnn_hidden_size']) elif FLAGS.model == 'cat': from networks.cat import Graph model = Graph(embeddings) else: raise ValueError(f'{FLAGS.model} is not a supported model') # Run mode if FLAGS.mode == 'train': save_flags() do_train(model, train, dev, input_model=siamese_graph) elif FLAGS.mode == 'eval': do_eval(model, train, dev, input_model=siamese_graph) elif FLAGS.mode == 'overfit': test_overfit(model, train, input_model=siamese_graph) elif FLAGS.mode == 'shell': do_shell(model, dev, input_model=siamese_graph) else: raise ValueError(f'Incorrect mode entered, {FLAGS.mode}')
def setting_simulation_env(): time_step = 1./240. num_of_objects = 20 num_of_objects_var = 4 # p.connect(p.GUI) reset_sim_env() viewMat = get_randomized_ViewMat()#sigma = 0.001 # projMatrix = [ [-0.0210269, -0.99247903, 0.120598, 0.26878399], # [-0.88814598, -0.0368459, -0.45808199, 0.293412 ], # [ 0.45908001, -0.116741, -0.88069099, 0.71057898], # [ 0., 0., 0., 1. ]] # camInfo = p.getDebugVisualizerCamera()# viewMat = camInfo[2] # projMatrix = camInfo[3] projMatrix = get_ProjMat() # action_keys = ["grasp/0/commanded_pose/transforms/base_T_endeffector/vec_quat_7", "grasp/1/commanded_pose/transforms/base_T_endeffector/vec_quat_7"] data_folder = "/Users/bozai/Desktop/PixelDA/PixelDA/Data/tfdata" file_tail = "22" data_path = get_data_paths(data_folder,file_tail) commands = commands_iterator(data_path) # try: # with open("sim_images/serial_num_log.txt",'r') as f: # img_serial_num = int(f.read()) # except: # print("read serial number failed!") # img_serial_num = 0 img_serial_num = 0 kukaEndEffectorIndex = 6 #joint damping coefficents jd=[0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001] while True: kuka_arm = reset_sim_env() kukaUid = kuka_arm.kukaUid # numJoints = kuka_arm.numJoints attemption, image = commands.__next__() randomObjs = add_random_objs_to_scene(num_of_objects+random.choice(range(-num_of_objects_var,num_of_objects_var+1))) for action_with_quaternion in attemption: quaternion = action_with_quaternion[0][3:] pos = action_with_quaternion[0][:3] jointPoses = p.calculateInverseKinematics(kukaUid,kukaEndEffectorIndex,pos,quaternion,jointDamping=jd) # print("numJoints : {}, jointPoses: {}".format(numJoints,len(jointPoses))) for i in range (12): p.resetJointState(kukaUid,i,jointPoses[i]) # time.sleep(30) # quaternion = action_with_quaternion[0][3:] # euler = p.getEulerFromQuaternion(quaternion)#[yaw,pitch,roll] # action = action_with_quaternion[0][:3].tolist() # action.append(euler[0]) # action.append(euler[2]) # kuka_arm.applyAction(action)#bug in the module implementation print("Saving image... Current image count: {}".format(img_serial_num)) img_arr = p.getCameraImage(640,512,viewMatrix=viewMat,projectionMatrix=projMatrix,lightDirection=[1,1,1])#640*512*3 # write_from_imgarr(img_arr, img_serial_num) subed = substitute_from_imgarr(img_arr,image) subed = cv2.cvtColor(subed, cv2.COLOR_RGB2BGR) cv2.imwrite("sim_backSubed_test/{0:0>6}_subed_test.jpeg".format(img_serial_num),subed) #save original image and mask as well bgra =img_arr[2]#imgarr[3] depth image; imgarr[4] segmentation mask img = np.reshape(bgra, (512, 640, 4)).astype(np.uint8)#BGRA img = cv2.cvtColor(img,cv2.COLOR_BGRA2RGB) segmentation_mask = img_arr[4] segmentation_mask = np.reshape(segmentation_mask,(512,640,1)).astype(np.uint8) seg_fig = plt.imshow(segmentation_mask[:,:,0], interpolation='nearest', aspect='equal') plt.axis('off') plt.savefig("sim_backSubed_test/{0:0>6}_segmentation_test.jpeg".format(img_serial_num), bbox_inches='tight') # seg_fig = plt.imsave("sim_backSubed_test/{0:0>6}_segmentation_test.jpeg".format(img_serial_num),segmentation_mask) # segmentation_mask = (255*np.reshape(segmentation_mask,(512,640))/np.max(segmentation_mask)).astype(np.uint8) # segmentation_mask = np.dstack((segmentation_mask,segmentation_mask,segmentation_mask)) cv2.imwrite("sim_backSubed_test/{0:0>6}_original_test.jpeg".format(img_serial_num),img) # cv2.imwrite("sim_backSubed_test/{0:0>6}_segmentation_test.jpeg".format(img_serial_num),segmentation_mask) img_serial_num +=1 if img_serial_num>20:#40000 with open("sim_images/serial_num_log.txt","w") as f: f.write(str(img_serial_num)) break
""" Pads data of format `(sequence, labels)` to `max_length` sequence length and returns a triplet `(sequence_, labels_, mask)`. If the length of the sequence is longer than `max_length` then it is truncated to `max_length`. """ # create padding vectors sequence_padding = PAD_ID pad_length = max([0, max_length - len(sequence)]) padded_sequence = sequence[:max_length] padded_sequence.extend([sequence_padding] * pad_length) length = min([len(sequence), max_length]) return padded_sequence, length def pad_sequences(sequences, max_length): padded_sequences, lengths = zip( *[pad_sequence(sequence, max_length) for sequence in sequences]) return padded_sequences, lengths if __name__ == '__main__': from utils import get_data_paths import os data_dir = os.path.join("..", "data", "squad") dataset = SquadDataset(*get_data_paths(data_dir, name='train'), 10) print(dataset.get_batch(2)) print(dataset[:2])
import utils import numpy as np # get all the datapaths in the dataset path = '..\\midis\\' try: data_paths = [os.path.join(path, o) \ for o in os.listdir(path) \ if os.path.isdir(os.path.join(path, o))] except OSError as e: print('Error: Invalid datapath!!!') count = 0 # copy the data files in to data\\train and data\\validation # the file names are changed to be index numbers of the music pieces. for data_path in data_paths: midi_datas = utils.get_data_paths(data_path) for midi_data in midi_datas: data_cur = np.load(midi_data) if count % 5 == 4: np.save('data\\validation\\{}.npy'.format(count), data_cur) else: np.save('data\\train\\{}.npy'.format(count), data_cur) count += 1 print(data_path + ' done!')
utils.generate_original_images(dataset_name) # Generate training set train_name_list = ['airplane' ] # optional, default is in generate_train_images() utils.generate_train_images(dataset_name, train_name_list) # utils.generate_train_images(dataset_name) # Generate test set # test_name_list = ['fruits', 'frymire'] # utils.generate_test_images(dataset_name, test_name_list) utils.generate_test_images(dataset_name) # Pre-process data # Load train and test sets data_paths = utils.get_data_paths(dataset_name) train_image_list, train_name_list = utils.load_images(data_paths['train'], file_ext='.png') test_image_list, test_name_list = utils.load_images(data_paths['test'], file_ext='.png') # Split in non-overlapping patches and vectorize test_set_ref = utils.generate_vec_set(test_image_list, patch_size) full_train_set_ref = utils.generate_vec_set(train_image_list, patch_size) train_set_ref, val_set_ref \ = utils.generate_cross_validation_sets(full_train_set_ref, fold_number=5, fold_combination=5) # Mix and compress train and test sets mm_type = 'gaussian-rip' # or 'bernoulli-rip' M = utils.create_measurement_model(mm_type, patch_size, compression_percent)
def train(): if FLAGS.load_model is not None: checkpoints_dir = "checkpoints/" + FLAGS.load_model.lstrip( "checkpoints/") else: current_time = datetime.now().strftime("%Y%m%d-%H%M") checkpoints_dir = "checkpoints/{}".format(current_time) try: os.makedirs(checkpoints_dir) except os.error: pass data_folder = "Data/tfdata" file_tail = "22" target_path = get_data_paths(data_folder, file_tail) graph = tf.Graph() with graph.as_default(): cycle_gan = CycleGAN( X_train_file=FLAGS.X, #Y_train_file=FLAGS.Y, Y_train_file=target_path, batch_size=FLAGS.batch_size, image_size=np.array([512, 640]), use_lsgan=FLAGS.use_lsgan, norm=FLAGS.norm, lambda1=FLAGS.lambda1, lambda2=FLAGS.lambda2, learning_rate=FLAGS.learning_rate, beta1=FLAGS.beta1, ngf=FLAGS.ngf) G_loss, D_Y_loss, F_loss, D_X_loss, fake_y, fake_x = cycle_gan.model() optimizers = cycle_gan.optimize(G_loss, D_Y_loss, F_loss, D_X_loss) summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(checkpoints_dir, graph) saver = tf.train.Saver() with tf.Session(graph=graph) as sess: if FLAGS.load_model is not None: #load existing model checkpoint = tf.train.get_checkpoint_state(checkpoints_dir) meta_graph_path = checkpoint.model_checkpoint_path + ".meta" restore = tf.train.import_meta_graph(meta_graph_path) restore.restore(sess, tf.train.latest_checkpoint(checkpoints_dir)) step = int(meta_graph_path.split("-")[2].split(".")[0]) else: #run a new one sess.run(tf.global_variables_initializer()) step = 0 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: fake_Y_pool = ImagePool(FLAGS.pool_size) fake_X_pool = ImagePool(FLAGS.pool_size) while not coord.should_stop(): # get previously generated images fake_y_val, fake_x_val = sess.run([fake_y, fake_x]) # train _, G_loss_val, D_Y_loss_val, F_loss_val, D_X_loss_val, summary = ( sess.run( [ optimizers, G_loss, D_Y_loss, F_loss, D_X_loss, summary_op ], feed_dict={ cycle_gan.fake_y: fake_Y_pool.query(fake_y_val), cycle_gan.fake_x: fake_X_pool.query(fake_x_val) })) train_writer.add_summary(summary, step) train_writer.flush() if step % 100 == 0: logging.info('-----------Step %d:-------------' % step) logging.info(' G_loss : {}'.format(G_loss_val)) logging.info(' D_Y_loss : {}'.format(D_Y_loss_val)) logging.info(' F_loss : {}'.format(F_loss_val)) logging.info(' D_X_loss : {}'.format(D_X_loss_val)) if step % 10000 == 0: save_path = saver.save(sess, checkpoints_dir + "/model.ckpt", global_step=step) logging.info("Model saved in file: %s" % save_path) step += 1 except KeyboardInterrupt: logging.info('Interrupted') coord.request_stop() except Exception as e: coord.request_stop(e) finally: save_path = saver.save(sess, checkpoints_dir + "/model.ckpt", global_step=step) logging.info("Model saved in file: %s" % save_path) # When done, ask the threads to stop. coord.request_stop() coord.join(threads)
# Train model-------------------- model, LossHistory = nnmodel.getNNModel() history = LossHistory() optimizer = Adam(lr=1e-4) model.compile(optimizer, loss="mse") plot(model, to_file='model.png') stopping_callback = EarlyStopping(patience=5) train_data_path = utils.train_data_path #'../data/Challenge 2/train' test_data_path = utils.test_data_path #'../data/Challenge 2/test' ch, width, height = utils.ch, utils.width, utils.height print "Preparing training and validation data..." train_paths = utils.get_data_paths(train_data_path) train_path_list, valid_path_list = utils.split_train_and_validate( train_paths, 0.8) # Use 80% for training, 20% for validation # Get list of training images train_images_df = utils.get_image_df(train_path_list).head(1024) num_train_images = train_images_df.shape[0] print "Found {} training images.".format(num_train_images) # Get list of validation images valid_images_df = utils.get_image_df(valid_path_list).head(1024) num_valid_images = valid_images_df.shape[0] print "Found {} validation images.".format(num_valid_images) # Save validation images for use by the viewer later valid_images_df.to_csv(
tokenized_data = word_tokenize(raw_data.lower()) #print(tokenized_data) vector = model.infer_vector(tokenized_data) toRet.append(vector) toRet2.append(posorneg) return toRet, toRet2 for MODELFILE in models: print(MODELFILE) accuracies = [] outfile.write(MODELFILE + "\n") for fold in range(numfolds): # get data print("\nfold {}".format(fold)) print("obtaining data") train_pos, test_pos = get_data_paths(data_positive, numfolds, fold) train_neg, test_neg = get_data_paths(data_negative, numfolds, fold) # preprocess data print("preprocessing data") start = time.time() pos_features, pos_labels = convertData(train_pos, MODELFILE, POS) neg_features, neg_labels = convertData(train_neg, MODELFILE, NEG) features = pos_features + neg_features labels = pos_labels + neg_labels # random shuffle z = list(zip(features, labels)) random.shuffle(z) features[:], labels[:] = zip(*z) #print(time.time() - start)
def prepare_data( source_path: str, mask_converter: Callable[[np.ndarray], np.ndarray] = convert_to_binary_water, only_distinct: bool = True, test_size: float = 0.2, step_x: int = 224, step_y: int = 224, size_x: int = 224, size_y: int = 224, verbose: bool = True): def print_if_verbose(text): if verbose: print(text) tmp_dir_path = '{}_tmp'.format(source_path) train_dir_path = '{}_train'.format(source_path) test_dir_path = '{}_test'.format(source_path) if exists(train_dir_path): shutil.rmtree(train_dir_path) if exists(test_dir_path): shutil.rmtree(test_dir_path) if exists(tmp_dir_path): shutil.rmtree(tmp_dir_path) os.makedirs(tmp_dir_path) os.makedirs(train_dir_path) os.makedirs(test_dir_path) args = get_data_paths(source_path) generator = dataset_generator(*args, size_x=size_x, size_y=size_y, step_x=step_x, step_y=step_y, mask_converter=mask_converter) print_if_verbose('Writing images to tmp folder...') # writing all images to tmp folder # TODO try to replace with zip infinite generator n = 0 for img, mask in generator: if not only_distinct or have_diff_cols(mask): cv2.imwrite('{}/{}_img.jpg'.format(tmp_dir_path, n), img) cv2.imwrite('{}/{}_mask.png'.format(tmp_dir_path, n), mask) n += 1 print_if_verbose("Done!") indices = list(range(n)) shuffle(indices) test_cnt = int(n * test_size) train_indices = indices[test_cnt:] test_indices = indices[0:test_cnt] print_if_verbose("Writing to train folder...") copy_from_tmp_folder(tmp_dir_path, train_dir_path, train_indices) print_if_verbose("Done!") print_if_verbose("Writing to test folder...") copy_from_tmp_folder(tmp_dir_path, test_dir_path, test_indices) print_if_verbose("Done!") shutil.rmtree(tmp_dir_path)