def find_decision_thresholds(validation_ids, weights_fp, logger, mod): # first need to save arrays with y and yhat mod.load_weights(weights_fp) batch_size = 16 test_gen = DataGen(validation_ids, batch_size) default_thresh = 0.33 batch_num = 0 batch_ids = [] logger.info("Generating predictions and saving") for batch in tqdm(test_gen): x = batch[0] y = batch[1] y_hat = mod.predict_on_batch(x) np.save(f"y/{batch_num}_y.npy", y) np.save(f"y/{batch_num}_y_hat.npy", y_hat) batch_ids.append(batch_num) batch_num += 1 responses = {num: default_thresh for num in range(29351)} thresholds = [x * .01 for x in range(100)] logger.info("Determining optimal thresholds for each response") for resp in tqdm(responses): ys = [] y_hats = [] for batch in batch_ids: y = np.load(f"y/{batch_num}_y.npy") y_hat = np.load(f"y/{batch_num}_y_hat.npy") ys.extend([val for val in y[:, resp]]) y_hats.extend([val for val in y_hat[:, resp]]) f1s = [] for idx, threshold in enumerate(thresholds): f1s.append(get_f1(threshold, y, y_hat)) # early stopping if f1s[-10] > f1s[-1]: break responses[resp] = thresholds[f1s.index(max(f1s))] with open("decision_thresholds.json", "w") as out: json.dump(responses, out) return responses
def main(engine: str, table_name: str): """ Generate (stand-in for collect) micro-batch of data and load onto Datatable while tracking and limiting isze. :param engine: str, SQL server and table route :param table_name: str, name of data table in database """ dg = DataGen() # Initialize data generator db = DatabaseWriter(engine=engine, table_name=table_name) # initialize DB writer input_dict = dg.dict_gen() # Create new observation db.data_insertion(input_dict) # Write observation
def test(weights_fp, logger, threshold, mod=get_model(2048), decision_thresholds): mod.load_weights(weights_fp) # this is a dict, need to make a list for better numba usage thresholds = [ decision_thresholds[resp] for resp in range(len(decision_thresholds)) ] test_ids = [] with open("test_ids", "r") as handle: for line in handle: test_ids.append(line.strip("\n")) true_pos = 0 false_pos = 0 false_neg = 0 batch_size = 16 test_gen = DataGen(test_ids, batch_size) for batch in tqdm(test_gen): x = batch[0] y = batch[1] y_hat = mod.predict_on_batch(x) tp_temp, fp_temp, fn_temp = count_metrics(y, y_hat, thresholds) true_pos += tp_temp false_pos += fp_temp false_neg += fn_temp if true_pos > 0: precision = true_pos / (true_pos + false_pos) recall = true_pos / (true_pos + false_neg) f1 = (2 * precision * recall) / (precision + recall) else: f1 = 0 logger.info( f"F1: {f1}, precision: {precision}, recall: {recall}, threshold: {threshold}" )
def load_data() -> ((), ()): """ Loads images of from defined dataset :return: lists of training data and testing data """ # generate images from wavfiles if CFG.CONVERT_AUDIO: datagen = DataGen() datagen.convert_audio_into_images() row = CFG.ROW col = CFG.COL depth = CFG.DEPTH if CFG.MNIST_DATA: # use MNIST data # test data shape: (10000, 28, 28), labels: (10000, ) # train data shape: (60000, 28, 28), labels: (60000, ) # e.g. single data sample: (1, 28, 28), label: (1, ) (train_data, train_labels), (test_data, test_labels) = mnist.load_data() else: # use custom data (train_data, train_labels), (test_data, test_labels), row, col, depth = custom_load() print(f'\ntraining_data: \n{train_data}') print(f'\ntraining_labels: \n{train_labels}') print(f'\ntest data: \n{test_data}') print(f'\ntest labels: \n{test_labels}\n') train_data = train_data.astype('float32') / 255 test_data = test_data.astype('float32') / 255 # resize to work with tensorflow training_data = train_data.reshape(train_data.shape[0], row, col, depth) testing_data = test_data.reshape(test_data.shape[0], row, col, depth) total_classes = CFG.TOTAL_LABELS training_labels = to_categorical(train_labels, total_classes) test_labels = to_categorical(test_labels, total_classes) return (training_data, training_labels), (testing_data, test_labels)
def __init__(self, debug=False): self.debug = debug if not debug: # Init device try: self.device = device() self.device.init() self.isConnected = True except: self.isConnected = False print '\tConnection error !' else: # Debug mode self.datagen = DataGen(25) # Init logger self.log = Logger(debug=debug) self.data = [-1 for x in range(N_SENSORS)]
results = mdl.fit_generator( train_gene, epochs=1, validation_data=(val_imgs, val_masks), steps_per_epoch=len(train_imgs) // BATCH_SIZE, ) if not loss_values_val or \ results.history['val_loss'] < np.array(loss_values_val).min(): lr_counter = 0 es_counter = 0 mdl.save_weights(save_dir, overwrite=True) else: lr_counter += 1 es_counter += 1 loss_values.append(results.history['loss']) loss_values_val.append(results.history['val_loss']) train_gene = DataGen(train_imgs, train_masks, weights=init_weights, \ batch_size=BATCH_SIZE, shuffle=False) train_pred = mdl.predict_generator(train_gene) for p_ind, pred in enumerate(train_pred): weights[p_ind] = jacc_loss(pred, train_masks[p_ind]) train_gene = DataGen(train_imgs, train_masks, weights=weights, \ batch_size=BATCH_SIZE, shuffle=True)
def train(conf, train_shape_list, train_data_list, val_data_list, all_train_data_list): # create training and validation datasets and data loaders data_features = ['pcs', 'pc_pxids', 'pc_movables', 'gripper_img_target', 'gripper_direction_camera', 'gripper_forward_direction_camera', \ 'result', 'cur_dir', 'shape_id', 'trial_id', 'is_original'] # load network model model_def = utils.get_model_module(conf.model_version) # create models network = model_def.Network(conf.feat_dim) utils.printout(conf.flog, '\n' + str(network) + '\n') # create optimizers network_opt = torch.optim.Adam(network.parameters(), lr=conf.lr, weight_decay=conf.weight_decay) # learning rate scheduler network_lr_scheduler = torch.optim.lr_scheduler.StepLR( network_opt, step_size=conf.lr_decay_every, gamma=conf.lr_decay_by) # create logs if not conf.no_console_log: header = ' Time Epoch Dataset Iteration Progress(%) LR TotalLoss' if not conf.no_tb_log: # https://github.com/lanpa/tensorboard-pytorch from tensorboardX import SummaryWriter train_writer = SummaryWriter(os.path.join(conf.exp_dir, 'train')) val_writer = SummaryWriter(os.path.join(conf.exp_dir, 'val')) # send parameters to device network.to(conf.device) utils.optimizer_to_device(network_opt, conf.device) # load dataset train_dataset = SAPIENVisionDataset([conf.primact_type], conf.category_types, data_features, conf.buffer_max_num, \ abs_thres=conf.abs_thres, rel_thres=conf.rel_thres, dp_thres=conf.dp_thres, img_size=conf.img_size, no_true_false_equal=conf.no_true_false_equal) val_dataset = SAPIENVisionDataset([conf.primact_type], conf.category_types, data_features, conf.buffer_max_num, \ abs_thres=conf.abs_thres, rel_thres=conf.rel_thres, dp_thres=conf.dp_thres, img_size=conf.img_size, no_true_false_equal=conf.no_true_false_equal) val_dataset.load_data(val_data_list) utils.printout(conf.flog, str(val_dataset)) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=conf.batch_size, shuffle=False, pin_memory=True, \ num_workers=0, drop_last=True, collate_fn=utils.collate_feats, worker_init_fn=utils.worker_init_fn) val_num_batch = len(val_dataloader) # create a data generator datagen = DataGen(conf.num_processes_for_datagen, conf.flog) # sample succ if conf.sample_succ: sample_succ_list = [] sample_succ_dirs = [] # start training start_time = time.time() last_train_console_log_step, last_val_console_log_step = None, None # if resume start_epoch = 0 if conf.resume: # figure out the latest epoch to resume for item in os.listdir(os.path.join(conf.exp_dir, 'ckpts')): if item.endswith('-train_dataset.pth'): start_epoch = int(item.split('-')[0]) # load states for network, optimizer, lr_scheduler, sample_succ_list data_to_restore = torch.load( os.path.join(conf.exp_dir, 'ckpts', '%d-network.pth' % start_epoch)) network.load_state_dict(data_to_restore) data_to_restore = torch.load( os.path.join(conf.exp_dir, 'ckpts', '%d-optimizer.pth' % start_epoch)) network_opt.load_state_dict(data_to_restore) data_to_restore = torch.load( os.path.join(conf.exp_dir, 'ckpts', '%d-lr_scheduler.pth' % start_epoch)) network_lr_scheduler.load_state_dict(data_to_restore) # rmdir and make a new dir for the current sample-succ directory old_sample_succ_dir = os.path.join( conf.data_dir, 'epoch-%04d_sample-succ' % (start_epoch - 1)) utils.force_mkdir(old_sample_succ_dir) # train for every epoch for epoch in range(start_epoch, conf.epochs): ### collect data for the current epoch if epoch > start_epoch: utils.printout( conf.flog, f' [{strftime("%H:%M:%S", time.gmtime(time.time()-start_time)):>9s} Waiting epoch-{epoch} data ]' ) train_data_list = datagen.join_all() utils.printout( conf.flog, f' [{strftime("%H:%M:%S", time.gmtime(time.time()-start_time)):>9s} Gathered epoch-{epoch} data ]' ) cur_data_folders = [] for item in train_data_list: item = '/'.join(item.split('/')[:-1]) if item not in cur_data_folders: cur_data_folders.append(item) for cur_data_folder in cur_data_folders: with open(os.path.join(cur_data_folder, 'data_tuple_list.txt'), 'w') as fout: for item in train_data_list: if cur_data_folder == '/'.join(item.split('/')[:-1]): fout.write(item.split('/')[-1] + '\n') # load offline-generated sample-random data for item in all_train_data_list: valid_id_l = conf.num_interaction_data_offline + conf.num_interaction_data * ( epoch - 1) valid_id_r = conf.num_interaction_data_offline + conf.num_interaction_data * epoch if valid_id_l <= int(item.split('_')[-1]) < valid_id_r: train_data_list.append(item) ### start generating data for the next epoch # sample succ if conf.sample_succ: if conf.resume and epoch == start_epoch: sample_succ_list = torch.load( os.path.join(conf.exp_dir, 'ckpts', '%d-sample_succ_list.pth' % start_epoch)) else: torch.save( sample_succ_list, os.path.join(conf.exp_dir, 'ckpts', '%d-sample_succ_list.pth' % epoch)) for item in sample_succ_list: datagen.add_one_recollect_job(item[0], item[1], item[2], item[3], item[4], item[5], item[6]) sample_succ_list = [] sample_succ_dirs = [] cur_sample_succ_dir = os.path.join( conf.data_dir, 'epoch-%04d_sample-succ' % epoch) utils.force_mkdir(cur_sample_succ_dir) # start all jobs datagen.start_all() utils.printout( conf.flog, f' [ {strftime("%H:%M:%S", time.gmtime(time.time()-start_time)):>9s} Started generating epoch-{epoch+1} data ]' ) ### load data for the current epoch if conf.resume and epoch == start_epoch: train_dataset = torch.load( os.path.join(conf.exp_dir, 'ckpts', '%d-train_dataset.pth' % start_epoch)) else: train_dataset.load_data(train_data_list) utils.printout(conf.flog, str(train_dataset)) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=conf.batch_size, shuffle=True, pin_memory=True, \ num_workers=0, drop_last=True, collate_fn=utils.collate_feats, worker_init_fn=utils.worker_init_fn) train_num_batch = len(train_dataloader) ### print log if not conf.no_console_log: utils.printout(conf.flog, f'training run {conf.exp_name}') utils.printout(conf.flog, header) train_batches = enumerate(train_dataloader, 0) val_batches = enumerate(val_dataloader, 0) train_fraction_done = 0.0 val_fraction_done = 0.0 val_batch_ind = -1 ### train for every batch for train_batch_ind, batch in train_batches: train_fraction_done = (train_batch_ind + 1) / train_num_batch train_step = epoch * train_num_batch + train_batch_ind log_console = not conf.no_console_log and (last_train_console_log_step is None or \ train_step - last_train_console_log_step >= conf.console_log_interval) if log_console: last_train_console_log_step = train_step # save checkpoint if train_batch_ind == 0: with torch.no_grad(): utils.printout(conf.flog, 'Saving checkpoint ...... ') torch.save( network.state_dict(), os.path.join(conf.exp_dir, 'ckpts', '%d-network.pth' % epoch)) torch.save( network_opt.state_dict(), os.path.join(conf.exp_dir, 'ckpts', '%d-optimizer.pth' % epoch)) torch.save( network_lr_scheduler.state_dict(), os.path.join(conf.exp_dir, 'ckpts', '%d-lr_scheduler.pth' % epoch)) torch.save( train_dataset, os.path.join(conf.exp_dir, 'ckpts', '%d-train_dataset.pth' % epoch)) utils.printout(conf.flog, 'DONE') # set models to training mode network.train() # forward pass (including logging) total_loss, whole_feats, whole_pcs, whole_pxids, whole_movables = forward(batch=batch, data_features=data_features, network=network, conf=conf, is_val=False, \ step=train_step, epoch=epoch, batch_ind=train_batch_ind, num_batch=train_num_batch, start_time=start_time, \ log_console=log_console, log_tb=not conf.no_tb_log, tb_writer=train_writer, lr=network_opt.param_groups[0]['lr']) # optimize one step network_opt.zero_grad() total_loss.backward() network_opt.step() network_lr_scheduler.step() # sample succ if conf.sample_succ: network.eval() with torch.no_grad(): # sample a random EE orientation random_up = torch.randn(conf.batch_size, 3).float().to(conf.device) random_forward = torch.randn(conf.batch_size, 3).float().to(conf.device) random_left = torch.cross(random_up, random_forward) random_forward = torch.cross(random_left, random_up) random_dirs1 = F.normalize(random_up, dim=1).float() random_dirs2 = F.normalize(random_forward, dim=1).float() # test over the entire image whole_pc_scores1 = network.inference_whole_pc( whole_feats, random_dirs1, random_dirs2) # B x N whole_pc_scores2 = network.inference_whole_pc( whole_feats, -random_dirs1, random_dirs2) # B x N # add to the sample_succ_list if wanted ss_cur_dir = batch[data_features.index('cur_dir')] ss_shape_id = batch[data_features.index('shape_id')] ss_trial_id = batch[data_features.index('trial_id')] ss_is_original = batch[data_features.index('is_original')] for i in range(conf.batch_size): valid_id_l = conf.num_interaction_data_offline + conf.num_interaction_data * ( epoch - 1) valid_id_r = conf.num_interaction_data_offline + conf.num_interaction_data * epoch if ('sample-succ' not in ss_cur_dir[i]) and (ss_is_original[i]) and (ss_cur_dir[i] not in sample_succ_dirs) \ and (valid_id_l <= int(ss_trial_id[i]) < valid_id_r): sample_succ_dirs.append(ss_cur_dir[i]) # choose one from the two options gt_movable = whole_movables[i].cpu().numpy() whole_pc_score1 = whole_pc_scores1[i].cpu().numpy( ) * gt_movable whole_pc_score1[whole_pc_score1 < 0.5] = 0 whole_pc_score_sum1 = np.sum( whole_pc_score1) + 1e-12 whole_pc_score2 = whole_pc_scores2[i].cpu().numpy( ) * gt_movable whole_pc_score2[whole_pc_score2 < 0.5] = 0 whole_pc_score_sum2 = np.sum( whole_pc_score2) + 1e-12 choose1or2_ratio = whole_pc_score_sum1 / ( whole_pc_score_sum1 + whole_pc_score_sum2) random_dir1 = random_dirs1[i].cpu().numpy() random_dir2 = random_dirs2[i].cpu().numpy() if np.random.random() < choose1or2_ratio: whole_pc_score = whole_pc_score1 else: whole_pc_score = whole_pc_score2 random_dir1 = -random_dir1 # sample <X, Y> on each img pp = whole_pc_score + 1e-12 ptid = np.random.choice(len(whole_pc_score), 1, p=pp / pp.sum()) X = whole_pxids[i, ptid, 0].item() Y = whole_pxids[i, ptid, 1].item() # add job to the queue str_cur_dir1 = ',' + ','.join( ['%f' % elem for elem in random_dir1]) str_cur_dir2 = ',' + ','.join( ['%f' % elem for elem in random_dir2]) sample_succ_list.append((conf.offline_data_dir, str_cur_dir1, str_cur_dir2, \ ss_cur_dir[i].split('/')[-1], cur_sample_succ_dir, X, Y)) # validate one batch while val_fraction_done <= train_fraction_done and val_batch_ind + 1 < val_num_batch: val_batch_ind, val_batch = next(val_batches) val_fraction_done = (val_batch_ind + 1) / val_num_batch val_step = (epoch + val_fraction_done) * train_num_batch - 1 log_console = not conf.no_console_log and (last_val_console_log_step is None or \ val_step - last_val_console_log_step >= conf.console_log_interval) if log_console: last_val_console_log_step = val_step # set models to evaluation mode network.eval() with torch.no_grad(): # forward pass (including logging) __ = forward(batch=val_batch, data_features=data_features, network=network, conf=conf, is_val=True, \ step=val_step, epoch=epoch, batch_ind=val_batch_ind, num_batch=val_num_batch, start_time=start_time, \ log_console=log_console, log_tb=not conf.no_tb_log, tb_writer=val_writer, lr=network_opt.param_groups[0]['lr'])
num_block=[3, 6, 6, 3], use_bn=True, use_height=False) TEST_KITTI_PATH_VELODYNE = '/mnt/raid_data/srr7rng/KITTI/data_object_velodyne/validation/velodyne/' TEST_KITTI_PATH_LABELS = '/mnt/raid_data/srr7rng/KITTI/data_object_label_2/validation/label_2/' TEST_KITTI_PATH_CALIBS = '/mnt/raid_data/srr7rng/KITTI/data_object_calib/validation/calib/' log_dir = 'logs/pixor' checkpoint_dir = 'checkpoint' testdatagenerator = DataGen(TEST_KITTI_PATH_VELODYNE, TEST_KITTI_PATH_LABELS, TEST_KITTI_PATH_CALIBS, batch_size=5, type='val', use_cache=True, augmentation=False, use_height=False, raw_lidar=True, norm=FLAGS.norm) cps = tf.train.latest_checkpoint(checkpoint_dir) print(f'latest checkpoint: {cps}') model.load_weights(cps) for i in range(len(testdatagenerator)): ip, true_op, pcd = testdatagenerator.__getitem__(0) op = model.predict(ip) pred_scores_len = []
if conf.category_types is None: conf.category_types = ['Box', 'Bucket', 'Door', 'Faucet', 'Kettle', 'KitchenPot', 'Microwave', 'Refrigerator', \ 'Safe', 'StorageFurniture', 'Switch', 'Table', 'TrashCan', 'WashingMachine', 'Window'] else: conf.category_types = conf.category_types.split(',') print(conf.category_types) cat2freq = dict() with open(conf.ins_cnt_fn, 'r') as fin: for l in fin.readlines(): cat, _, freq = l.rstrip().split() cat2freq[cat] = int(freq) print(cat2freq) datagen = DataGen(conf.num_processes) with open(conf.data_fn, 'r') as fin: for l in fin.readlines(): shape_id, cat = l.rstrip().split() if cat in conf.category_types: for primact_type in conf.primact_types: for epoch in range(conf.starting_epoch, conf.starting_epoch + conf.num_epochs): for cnt_id in range(cat2freq[cat]): #print(shape_id, cat, epoch, cnt_id) datagen.add_one_collect_job(conf.data_dir, shape_id, cat, cnt_id, primact_type, epoch) datagen.start_all()
def test(save_path, max_sequence_length = 50, max_vocab_size = 5000, data_path = None, weights_only = False): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") datagen = DataGen(data_path = data_path, batch_size = batch_size, max_seq_len = max_sequence_length, max_vocab_size = max_vocab_size) if not weights_only: datagen.init_data(mode = 'test') model = torch.load(save_path) else: datagen.init_data() input_size = datagen.input_size # Input vocab size hidden_size = HIDDEN_SIZE output_size = datagen.target_size # Target vocab size input_length = datagen.input_length output_length = datagen.target_length model = Seq2SeqAttnNet(input_size, hidden_size, output_size, input_length, output_length).to(device) model.load_state_dict(torch.load(save_path)) model.set_mode('test') inp_text = input('Enter text in english:') inp_text = datagen.tokenize(inp_text) inp_text = datagen.encode_source_text(inp_text) inp_text = torch.tensor(np.array([inp_text]), dtype = torch.long, device = device) encoder_out, hidden = model.encoder(inp_text) dec_word_list = [] attn_wts_list = [] prev_word = datagen.encode_target_text([datagen.SOS]) prev_word = torch.tensor(np.array([prev_word]), dtype = torch.long, device = device) count = 0 dec_word = None while prev_word != datagen.word2idx_target[datagen.EOS] and count < output_length: x, hidden, attn_wts = model.decoder(encoder_out, hidden, prev_word, False) top_v, top_i = x.squeeze().topk(1) prev_word = top_i attn_wts_list.append(attn_wts) dec_word_list.append(prev_word) prev_word = prev_word.unsqueeze(1) count += 1 out_text_enc = np.array([x for x in dec_word_list if x != 0]) out_text = datagen.decode_target_text(out_text_enc) out_text = ' '.join(out_text) print(out_text)
def train(batch_size, max_sequence_length, max_vocab_size, data_path, save_path, resume_flag = False): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") datagen = DataGen(data_path = data_path, batch_size = batch_size, max_seq_len = max_sequence_length, max_vocab_size = max_vocab_size) datagen.init_data() input_size = datagen.input_size # Input vocab size hidden_size = HIDDEN_SIZE output_size = datagen.target_size # Target vocab size input_length = datagen.input_length output_length = datagen.target_length print('Input vocab size: ', input_size) print('Target vocab size: ', output_size) print('Input text length: ', input_length) print('Target text length: ', output_length) model = Seq2SeqAttnNet(input_size, hidden_size, output_size, input_length, output_length).to(device) criterion = nn.NLLLoss() encoder_opt = optim.Adamax(model.encoder.parameters()) decoder_opt = optim.Adamax(model.decoder.parameters()) def checkpoint(model, epoch, chk_path = 'seq2seq_chk.pth'): torch.save(model.state_dict(), chk_path) print (model) print ('Model built successfully...') model_parameters = filter(lambda p: p.requires_grad, model.parameters()) print('Total params: {}'.format(sum([np.prod(p.size()) for p in model_parameters]))) train_steps = datagen.train_size // batch_size val_steps = datagen.val_size // batch_size epochs = 1000 if resume_flag and os.path.exists('seq2seq_chk.pth'): model.load_state_dict(torch.load('seq2seq_chk.pth')) train_datagen = datagen.get_batch(mode = 'train') val_datagen = datagen.get_batch(mode = 'val') for epoch in range(epochs): train_loss = 0 model.set_mode('train') for step_idx in range(train_steps): x, decoder_inp, y = next(train_datagen) model.set_decoder_inp(decoder_inp) pred, _ = model(x) loss = criterion(pred, y) encoder_opt.zero_grad() decoder_opt.zero_grad() train_loss += loss.item() loss.backward() encoder_opt.step() decoder_opt.step() # print("===> Step {} : Loss: {:.4f}".format(step_idx, # loss.item())) print ("===> Epoch {} Complete: Avg. Training Loss: {:.4f}".format(epoch, train_loss / train_steps)) val_loss = 0 model.set_mode('val') with torch.no_grad(): for step_idx in range(val_steps): x, _, y = next(val_datagen) pred, _ = model(x) loss = criterion(pred, y) val_loss += loss.item() print ("===> Epoch {} Complete: Avg. validation Loss: {:.4f}".format(epoch, val_loss / val_steps)) checkpoint(model, epoch, save_path) torch.save(model, save_path)
if __name__ == "__main__": logger = get_logger() ids_list = [] with open("train_ids_expanded", "r") as handle: for line in handle: ids_list.append(line.strip("\n")) validation_ids = ids_list[400000:] ids_list = ids_list[:400000] epochs = 30 batch_size = 16 training_generator = DataGen(ids_list, batch_size) model_code = "current_test" fp = f"weights.{model_code}.hdf5" logger.info(f"training on all ids, expanded training set. {epochs} epochs") #dims = [100, 200, 400, 800, 1600, 2000] dim = 1600 #for dim in dims: logger.info(f"training model with {dim}, {dim} dimension dense") mod = get_model(dim) mod.fit(training_generator, use_multiprocessing=True, workers=4, epochs=epochs) mod.save_weights(fp)
if __name__ == '__main__': print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) model = pixor_modified(input_shape=(800, 700, 36), num_block=[3, 6, 6, 3], use_bn=True, use_height=False) PREFIX_PATH = '/home/rash8327/Downloads/kitti' KITTI_PATH_VELODYNE = '{0}/data_object_velodyne/training/velodyne/'.format(PREFIX_PATH) KITTI_PATH_LABELS = '{0}/data_object_label_2/training/label_2/'.format(PREFIX_PATH) KITTI_PATH_CALIBS = '{0}/data_object_calib/training/calib/'.format(PREFIX_PATH) VAL_KITTI_PATH_VELODYNE = '{0}/data_object_velodyne/validation/velodyne/'.format(PREFIX_PATH) VAL_KITTI_PATH_LABELS = '{0}/data_object_label_2/validation/label_2/'.format(PREFIX_PATH) VAL_KITTI_PATH_CALIBS = '{0}/data_object_calib/validation/calib/'.format(PREFIX_PATH) log_dir = 'logs/pixor_modified' checkpoint_dir = 'checkpoint' datagenerator = DataGen(KITTI_PATH_VELODYNE, KITTI_PATH_LABELS, KITTI_PATH_CALIBS, use_cache=True, batch_size=3, type='train', augmentation=True, use_height=False, norm=False) valdatagenerator = DataGen(VAL_KITTI_PATH_VELODYNE, VAL_KITTI_PATH_LABELS, VAL_KITTI_PATH_CALIBS, use_cache=True, batch_size=3, type='val', augmentation=False, use_height=False, norm=False) weight_decay = 0.0001 for layer in model.layers: for attr in ['kernel_regularizer', 'bias_regularizer']: if hasattr(layer, attr): setattr(layer, attr, tf.keras.regularizers.l2(weight_decay)) model = tf.keras.models.model_from_json(model.to_json()) lr = 0.001 model.compile(optimizer=optimizers.Adam(learning_rate=lr), loss=custom_loss(class_weight=1.0, reg_weight=1.0), metrics=[class_loss, reg_loss], run_eagerly=True