def _train_network(opts, network, optimizer, train_data, test_data, val_data, label_weight): """Train the network.""" print("Beginning training...") # train_exps = train_data["experiments"].value # train_exps.sort() step = 0 for i in range(opts["flags"].total_epochs): print("EPOCH %d, %d" % (i, step)) network.train() step = _train_epoch(opts, step, network, optimizer, train_data, test_data, label_weight) print("\tFinished epoch") print("\tProcessing all examples...") network.eval() _log_outputs(opts, step, network, label_weight) round_tic = time.time() # # save the network in its own folder in the networks folder out_dir = os.path.join( opts["flags"].out_dir, "networks", "%d" % step) paths.create_dir(out_dir) out_name = os.path.join(out_dir, "network.pt") torch.save(network.cpu().state_dict(), out_name) network.cuda() # hantman_hungarian_image.save_network(opts, network, out_name) print("Finished training.")
def create_train_test(opts): """Create the training and testing splits.""" # first setup the output space. The output space will be in the same folder # as the original data.hdf file, but with different names and a seperate # sub folder for the setup information. base_out = os.path.dirname(opts["flags"].data) log_output_path = os.path.join(base_out, opts["flags"].name) # out_data_name = os.path.join(base_out, opts["flags"].name + ".hdf5") base_out_name = os.path.join(base_out, opts["flags"].name) exp_path = os.path.join(base_out, "exps") paths.create_dir(log_output_path) # add the initial logging information to the output path. git_helper.log_git_status( os.path.join(log_output_path, "00_git_status.txt")) paths.save_command2(log_output_path, opts["argv"]) # now to do the actual splitting. # first open the base data.hdf with h5py.File(opts["flags"].data, "r") as org_data: exp_list = org_data["exp_names"].value exp_mask = numpy.ones((exp_list.shape[0], ), dtype="bool") if opts["flags"].split_type == 2: train_idx, test_idx, valid_idx = hantman_mouse.setup_full_split2( opts, org_data, mask=exp_mask, test_mouse=opts["flags"].test_mouse) else: # else split type 3... no split type 1 train_idx, test_idx, valid_idx = hantman_mouse.setup_full_split3( opts, org_data, mask=exp_mask) split_name = base_out_name + "_train.hdf5" save_experiments(opts, org_data, exp_path, train_idx, split_name, mask=exp_mask) split_name = base_out_name + "_test.hdf5" save_experiments(opts, org_data, exp_path, test_idx, split_name, mask=exp_mask) split_name = base_out_name + "_valid.hdf5" save_experiments(opts, org_data, exp_path, valid_idx, split_name, mask=exp_mask) return
def _train_network(opts, network, optimizer, criterion, sampler, train_eval, test_eval, valid_eval): """Train the network.""" print("Beginning training...") # train_exps = train_data["experiments"].value # train_exps.sort() frame_thresh = [10 for label in g_label_names] step = 0 for i in range(opts["flags"].total_epochs): print("EPOCH %d, %d" % (i, step)) tic = time.time() network.train() step += _train_epoch(opts, network, optimizer, criterion, sampler) print("\t%f" % (time.time() - tic)) print("\tFinished epoch") if i % opts['flags'].update_iterations == 0 and i != 0: print("\tProcessing all examples...") tic = time.time() network.eval() train_cost = _eval_network(opts, step, network, train_eval, criterion, "train") if DEBUG: test_cost = train_cost valid_cost = train_cost else: test_cost = _eval_network(opts, step, network, test_eval, criterion, "test") valid_cost = _eval_network(opts, step, network, valid_eval, criterion, "valid") sequences_helper.log_outputs3(opts, step, train_cost, test_cost, valid_cost, g_label_names, frame_thresh=frame_thresh) if i % opts['flags'].save_iterations == 0: # save the network in its own folder in the networks folder print("\tSaving network...") out_dir = os.path.join(opts["flags"].out_dir, "networks", "%d" % step) paths.create_dir(out_dir) out_name = os.path.join(out_dir, "network.pt") torch.save(network.cpu().state_dict(), out_name) network.cuda() print("\tProcessing finished: %f" % (time.time() - tic)) out_dir = os.path.join(opts["flags"].out_dir, "networks", "%d" % step) paths.create_dir(out_dir) out_name = os.path.join(out_dir, "network.pt") torch.save(network.cpu().state_dict(), out_name) network.cuda() print("Finished training.")
def main(argv): opts = _setup_opts(sys.argv) # paths.setup_output_space(opts) out_dir = os.path.join(opts["flags"].out_dir, 'proc_info') paths.create_dir(out_dir) paths.save_command(opts, out_dir) git_helper.log_git_status(os.path.join(out_dir, "git_status.txt")) if opts["flags"].cuda_device != -1: torch.cuda.set_device(opts["flags"].cuda_device) with h5py.File(opts["flags"].train_file, "r") as train_data: with h5py.File(opts["flags"].test_file, "r") as test_data: with h5py.File(opts["flags"].valid_file, "r") as valid_data: sampler = HantmanVideoFrameSampler( opts["rng"], train_data, opts["flags"].video_dir, opts["flags"].hantman_mini_batch, frames=opts["flags"].frames, use_pool=True, gpu_id=opts["flags"].cuda_device) label_weight = _get_label_weight(opts, train_data) # import pdb; pdb.set_trace() train_eval = HantmanVideoSampler( None, train_data, opts["flags"].video_dir, use_pool=True, gpu_id=opts["flags"].cuda_device) test_eval = HantmanVideoSampler( None, test_data, opts["flags"].video_dir, use_pool=True, gpu_id=opts["flags"].cuda_device) valid_eval = HantmanVideoSampler( None, valid_data, opts["flags"].video_dir, use_pool=True, gpu_id=opts["flags"].cuda_device) network, optimizer, criterion = _init_network( opts, label_weight) # import pdb; pdb.set_trace() _proc_network(opts, network, optimizer, criterion, sampler, train_eval, test_eval, valid_eval)
def _train_network(opts, network, optimizer, criterion, sampler, train_eval, test_eval, valid_eval): """Train the network.""" print("Beginning training...") frame_thresh = [10, 10, 10, 10, 10, 10] step = 0 for i in range(opts["flags"].total_epochs): print("EPOCH %d, %d" % (i, step)) tic = time.time() network.train() step += _train_epoch(opts, network, optimizer, criterion, sampler) print("\t%f" % (time.time() - tic)) print("\tFinished epoch") if i % opts["flags"].update_iterations == 0: network.eval() train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\ _eval_network(opts, step, network, criterion, train_eval, test_eval, valid_eval, frame_thresh=frame_thresh) # write to disk _write_loss_scores(opts, step, train_loss, test_loss, valid_loss) _write_f_scores(opts, step, train_match, test_match, valid_match) # save the network in its own folder in the networks folder out_dir = os.path.join( opts["flags"].out_dir, "networks", "%d" % step) paths.create_dir(out_dir) out_name = os.path.join(out_dir, "network.pt") torch.save(network.cpu().state_dict(), out_name) network.cuda() print("\tProcessing finished: %f" % (time.time() - tic)) network.eval() train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\ _eval_network(opts, step, network, criterion, train_eval, test_eval, valid_eval, frame_thresh=frame_thresh) _write_loss_scores(opts, step, train_loss, test_loss, valid_loss) _write_f_scores(opts, step, train_match, test_match, valid_match) # save the network in its own folder in the networks folder out_dir = os.path.join( opts["flags"].out_dir, "networks", "%d" % step) paths.create_dir(out_dir) out_name = os.path.join(out_dir, "network.pt") torch.save(network.cpu().state_dict(), out_name) network.cuda() print("Finished training.")
def write_csvs(out_dir, exp_name, label_names, labels, predict): # frame, behavior, behavior ground truth, image # labels = labels.reshape((labels.shape[0], 1, labels.shape[1])) # predict = predict.reshape((predict.shape[0], 1, predict.shape[1])) frames = [list(range(labels.shape[0]))] temp = [label for label in label_names] # for each prediction, update the csv file. current_exp_path = out_dir # os.path.join(out_dir, exp_name) paths.create_dir(out_dir) paths.create_dir(current_exp_path) for j in range(len(temp)): # filename = "%03d_predict_%s.csv" % (j, labels[j]) filename = "odas_%s.csv" % temp[j] current_exp_file = os.path.join(current_exp_path, filename) with open(current_exp_file, "w") as outfile: sequences_helper.write_csv(outfile, temp[j], predict[:, j], labels[:, j], frames[0])
def main(opts): # create the output directory paths.create_dir(opts["flags"].out_dir) paths.save_command2(opts["flags"].out_dir, opts["argv"]) # log the git information git_helper.log_git_status( os.path.join(opts["flags"].out_dir, "00_git_status.txt")) exp_dir = os.path.join(opts["flags"].out_dir, "exps") paths.create_dir(exp_dir) outname = os.path.join(opts["flags"].out_dir, "data.hdf5") logname = os.path.join(opts["flags"].out_dir, "00_log.txt") skipname = os.path.join(opts["flags"].out_dir, "00_skipped.txt") with open(logname, "w") as log: with open(skipname, "w") as skip_log: with h5py.File(outname, "w") as out_data: preprocess_features(opts, log, skip_log, out_data, exp_dir)
def _train_network(opts, network, optimizer, criterion, sampler, train_eval, test_eval, valid_eval): """Train the network.""" print("Beginning training...") frame_thresh = [10, 10, 10, 10, 10, 10] network.eval() step = 0 train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\ _eval_network(opts, step, network, criterion, train_eval, test_eval, valid_eval, frame_thresh=frame_thresh) _write_loss_scores(opts, step, train_loss, test_loss, valid_loss) _write_f_scores(opts, step, train_match, test_match, valid_match) # save the network in its own folder in the networks folder out_dir = os.path.join(opts["flags"].out_dir, "networks", "%d" % step) paths.create_dir(out_dir) out_name = os.path.join(out_dir, "network.pt") # torch.save(network.cpu().state_dict(), out_name) network.cuda() print("Finished training.")
def log_info(opts, train_vids, test_vids): """Log some other settings for the training setup.""" out_dir = os.path.join(opts["flags"].out_dir, "info") paths.create_dir(out_dir) train_txt = os.path.join(out_dir, "train_vids.txt") with open(train_txt, "w") as f: for train_vid in train_vids: f.write("%s\n" % train_vid) test_txt = os.path.join(out_dir, "test_vids.txt") with open(test_txt, "w") as f: for test_vid in test_vids: f.write("%s\n" % test_vid) info_txt = os.path.join(out_dir, "info.txt") with open(info_txt, "w") as f: f.write("Num train: %d\n" % len(train_vids)) f.write("Num test: %d\n" % len(test_vids)) f.write("Iters per epoch: %d\n" % opts["flags"].iter_per_epoch) f.write("Update iterations: %d\n" % opts["flags"].update_iterations) f.write("Save iterations: %d\n" % opts["flags"].save_iterations)
def _train_network(opts, network, optimizer, criterion, sampler, train_eval, test_eval, valid_eval): """Train the network.""" print("Beginning training...") # train_exps = train_data["experiments"].value # train_exps.sort() frame_thresh = [10 for label in g_label_names] step = 0 for i in range(opts["flags"].total_epochs): print("EPOCH %d, %d" % (i, step)) tic = time.time() network.train() step += _train_epoch(opts, network, optimizer, criterion, sampler) print("\t%f" % (time.time() - tic)) print("\tFinished epoch") # if DEBUG and i % 20 == 0 and i != 0: # if i % 20 == 0 and i != 0: # if i % 20 == 0 and i != 0: if i % opts["flags"].update_iterations == 0 and i != 0: print("\tProcessing all examples...") tic = time.time() network.eval() # train_loss, test_loss, valid_loss = _eval_network( # opts, step, network, criterion, train_eval, test_eval, valid_eval) train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\ _eval_network(opts, step, network, criterion, train_eval, test_eval, valid_eval, frame_thresh=frame_thresh) _write_loss_scores(opts, step, train_loss, test_loss, valid_loss) _write_f_scores(opts, step, train_match, test_match, valid_match) # sequences_helper.log_outputs3( # opts, step, train_cost, test_cost, valid_cost, g_label_names, # frame_thresh=frame_thresh) # import pdb; pdb.set_trace() # save the network in its own folder in the networks folder out_dir = os.path.join(opts["flags"].out_dir, "networks", "%d" % step) paths.create_dir(out_dir) out_name = os.path.join(out_dir, "network.pt") torch.save(network.cpu().state_dict(), out_name) network.cuda() print("\tProcessing finished: %f" % (time.time() - tic)) network.eval() # train_loss, test_loss, valid_loss = _eval_network( # opts, step, network, criterion, train_eval, test_eval, valid_eval) train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\ _eval_network(opts, step, network, criterion, train_eval, test_eval, valid_eval, frame_thresh=frame_thresh) _write_loss_scores(opts, step, train_loss, test_loss, valid_loss) _write_f_scores(opts, step, train_match, test_match, valid_match) # save the network in its own folder in the networks folder out_dir = os.path.join(opts["flags"].out_dir, "networks", "%d" % step) paths.create_dir(out_dir) out_name = os.path.join(out_dir, "network.pt") torch.save(network.cpu().state_dict(), out_name) network.cuda() print("Finished training.")
# import pdb; pdb.set_trace() out_data["features"] = all_feat out_data["experiments"] = all_exps out_data["mice"] = all_mice out_data["date"] = all_dates out_data["labels"] = all_labels # import pdb; pdb.set_trace() return if __name__ == "__main__": opts = create_opts() opts = setup_opts(opts) # create the output directory # paths.setup_output_space(opts["out_dir"]) paths.create_dir(opts["out_dir"]) paths.save_command(opts["out_dir"]) # log the git information git_helper.log_git_status( os.path.join(opts["out_dir"], "00_git_status.txt")) outname = os.path.join(opts["out_dir"], "data.hdf5") logname = os.path.join(opts["out_dir"], "00_log.txt") skipname = os.path.join(opts["out_dir"], "00_skipped.txt") with open(logname, "w") as log: with open(skipname, "w") as skip_log: with h5py.File(outname, "w") as out_data: preprocess_features(opts, log, skip_log, out_data)
def create_opts(): """Create an opts dictionary.""" opts = dict() opts["filename"] = "" opts["out_dir"] = "" opts["exp_dir"] = g_exp_dir opts["all_exp"] = g_all_exp_dir return opts if __name__ == "__main__": opts = create_opts() opts = setup_opts(opts) # create the output directory paths.create_dir(opts["out_dir"]) paths.create_dir(os.path.join(opts["out_dir"], "exps")) paths.save_command(opts["out_dir"]) # log the git information # git_helper.log_git_status( # os.path.join(opts["out_dir"], "00_git_status.txt")) # try to load the locations of the original experiments. h5filename = os.path.join(opts["out_dir"], "00_exp_cache.hdf5") h5file = h5py.File(h5filename, "a") # load the mat file matfile = sio.loadmat(opts["filename"]) logfilename = os.path.join(opts["out_dir"], "00_log.txt")
def create_train_test(opts): """Create the training and testing splits.""" # first setup the output space. The output space will be in the same folder # as the original data.hdf file, but with different names and a seperate # sub folder for the setup information. base_out = os.path.dirname(opts["flags"].data) log_output_path = os.path.join(base_out, opts["flags"].name) # out_data_name = os.path.join(base_out, opts["flags"].name + ".hdf5") base_out_name = os.path.join(base_out, opts["flags"].name) exp_path = os.path.join(base_out, "exps") paths.create_dir(log_output_path) # add the initial logging information to the output path. git_helper.log_git_status( os.path.join(log_output_path, "00_git_status.txt")) paths.save_command2(log_output_path, opts["argv"]) # now to do the actual splitting. # first open the base data.hdf with h5py.File(opts["flags"].data, "a") as org_data: exp_list = org_data["experiments"].value # get ride of long videos. import pdb pdb.set_trace() exp_mask = hantman_mouse.mask_long_vids(org_data, exp_list) # prune lists further to make an easier dataset. exp_mask = prune_mice_dates(opts, org_data, mask=exp_mask) if opts["flags"].one_mouse is True and opts["flags"].one_day is True: # If one mouse and one date, then just split randomly. num_vids = exp_mask.sum() rand_idx = opts["rng"].permutation(num_vids) # split percentage is 80% (should this be changeable?) split_idx = int(np.floor(num_vids * 0.8)) train_idx = rand_idx[:split_idx] test_idx = rand_idx[split_idx:] elif opts["flags"].one_mouse is False and opts["flags"].one_day is True: print("Not defined.") import pdb pdb.set_trace() else: train_idx, test_idx = hantman_mouse.setup_train_test_samples( opts, org_data, mask=exp_mask) split_name = base_out_name + "_train.hdf5" save_experiments(opts, org_data, exp_path, train_idx, split_name, mask=exp_mask) split_name = base_out_name + "_test.hdf5" save_experiments(opts, org_data, exp_path, test_idx, split_name, mask=exp_mask) print("hi") return
data['label_names'] = label_names data['features'] = all_feats data['labels'] = all_labels # data['crop_first'] = all_crop_first # data['crop_last'] = all_crop_last data['crops'] = all_crops data['num_frames'] = all_num_frames data['frame_idx'] = all_frame_idx # data['org_frames'] = all_org_frames return data if __name__ == "__main__": opts = create_opts() opts = setup_opts(opts) # create the output directory # paths.setup_output_space(opts['out_dir']) paths.create_dir(opts['out_dir']) paths.save_command(opts['out_dir']) # log the git information git_helper.log_git_status( os.path.join(opts['out_dir'], '00_git_status.txt')) data = parse_matfile(opts) outname = os.path.join(opts['out_dir'], 'data.npy') joblib.dump(data, outname) # create the hdf5 verson of the data
# all_feat = numpy.concatenate(all_feat) # true_mean = all_feat.mean(axis=0) # true_std = all_feat.std(axis=0) # print numpy.any(numpy.abs(true_mean - data_mean) > 0.001) # print numpy.any(numpy.abs(true_std - data_std) > 0.001) return data_mean, data_std if __name__ == "__main__": used_exp_filename = "/localhome/kwaki/data/hantman/used_exps.txt" data_dir = "/media/drive1/data/hantman/" out_dir = "/media/drive1/data/hantman_processed/hoghof/" # out_dir =\ # "/media/drive1/data/hantman_processed/hoghof_single_mouse_test/" paths.create_dir(out_dir) filename = os.path.join(out_dir, "data.hdf5") # to help make the conversion easier, load up a previously created # data file. all_data = joblib.load(("/media/drive1/data/hantman_processed/" "joblib/test/data.npy")) # "joblib/relative_39window/data.npy")) lines = [] with open(used_exp_filename, "r") as exp_file: lines = exp_file.readlines() for i in range(len(lines)): lines[i] = lines[i].rstrip() with h5py.File(filename, "w") as hdf5_file: