def setup_output_space(opts): # create the output directory flags = opts["flags"] # Wasn't able to save the GFLAGS object in a pickle easily. So clear the # GFLAGS, save the dictionary and on re-load just regen the flags. # create the output directories out_dir = flags.out_dir create_dir(out_dir) create_dir(out_dir + "/predictions") # create_dir(out_dir + "/predictions/valid") # create_dir(out_dir + "/predictions/train") # create_dir(out_dir + "/predictions/test") create_dir(out_dir + "/plots") create_dir(out_dir + "/opts") # create_dir(out_dir + "/grads") create_dir(out_dir + "/networks") # save the opts dict (without the gflags obj). opts["flags"] = None joblib.dump(opts, os.path.join(flags.out_dir, "opts", "opts.npy")) opts["flags"] = flags # save the command # save_command(opts, flags.out_dir) save_command2(flags.out_dir, opts["argv"]) git_helper.log_git_status( os.path.join(opts["flags"].out_dir, "git_status.txt")) return opts
def create_train_test(opts): """Create the training and testing splits.""" # first setup the output space. The output space will be in the same folder # as the original data.hdf file, but with different names and a seperate # sub folder for the setup information. base_out = os.path.dirname(opts["flags"].data) log_output_path = os.path.join(base_out, opts["flags"].name) # out_data_name = os.path.join(base_out, opts["flags"].name + ".hdf5") base_out_name = os.path.join(base_out, opts["flags"].name) exp_path = os.path.join(base_out, "exps") paths.create_dir(log_output_path) # add the initial logging information to the output path. git_helper.log_git_status( os.path.join(log_output_path, "00_git_status.txt")) paths.save_command2(log_output_path, opts["argv"]) # now to do the actual splitting. # first open the base data.hdf with h5py.File(opts["flags"].data, "r") as org_data: exp_list = org_data["exp_names"].value exp_mask = numpy.ones((exp_list.shape[0], ), dtype="bool") if opts["flags"].split_type == 2: train_idx, test_idx, valid_idx = hantman_mouse.setup_full_split2( opts, org_data, mask=exp_mask, test_mouse=opts["flags"].test_mouse) else: # else split type 3... no split type 1 train_idx, test_idx, valid_idx = hantman_mouse.setup_full_split3( opts, org_data, mask=exp_mask) split_name = base_out_name + "_train.hdf5" save_experiments(opts, org_data, exp_path, train_idx, split_name, mask=exp_mask) split_name = base_out_name + "_test.hdf5" save_experiments(opts, org_data, exp_path, test_idx, split_name, mask=exp_mask) split_name = base_out_name + "_valid.hdf5" save_experiments(opts, org_data, exp_path, valid_idx, split_name, mask=exp_mask) return
def main(argv): opts = _setup_opts(sys.argv) # paths.setup_output_space(opts) out_dir = os.path.join(opts["flags"].out_dir, 'proc_info') paths.create_dir(out_dir) paths.save_command(opts, out_dir) git_helper.log_git_status(os.path.join(out_dir, "git_status.txt")) if opts["flags"].cuda_device != -1: torch.cuda.set_device(opts["flags"].cuda_device) with h5py.File(opts["flags"].train_file, "r") as train_data: with h5py.File(opts["flags"].test_file, "r") as test_data: with h5py.File(opts["flags"].valid_file, "r") as valid_data: sampler = HantmanVideoFrameSampler( opts["rng"], train_data, opts["flags"].video_dir, opts["flags"].hantman_mini_batch, frames=opts["flags"].frames, use_pool=True, gpu_id=opts["flags"].cuda_device) label_weight = _get_label_weight(opts, train_data) # import pdb; pdb.set_trace() train_eval = HantmanVideoSampler( None, train_data, opts["flags"].video_dir, use_pool=True, gpu_id=opts["flags"].cuda_device) test_eval = HantmanVideoSampler( None, test_data, opts["flags"].video_dir, use_pool=True, gpu_id=opts["flags"].cuda_device) valid_eval = HantmanVideoSampler( None, valid_data, opts["flags"].video_dir, use_pool=True, gpu_id=opts["flags"].cuda_device) network, optimizer, criterion = _init_network( opts, label_weight) # import pdb; pdb.set_trace() _proc_network(opts, network, optimizer, criterion, sampler, train_eval, test_eval, valid_eval)
def main(opts): # create the output directory paths.create_dir(opts["flags"].out_dir) paths.save_command2(opts["flags"].out_dir, opts["argv"]) # log the git information git_helper.log_git_status( os.path.join(opts["flags"].out_dir, "00_git_status.txt")) exp_dir = os.path.join(opts["flags"].out_dir, "exps") paths.create_dir(exp_dir) outname = os.path.join(opts["flags"].out_dir, "data.hdf5") logname = os.path.join(opts["flags"].out_dir, "00_log.txt") skipname = os.path.join(opts["flags"].out_dir, "00_skipped.txt") with open(logname, "w") as log: with open(skipname, "w") as skip_log: with h5py.File(outname, "w") as out_data: preprocess_features(opts, log, skip_log, out_data, exp_dir)
# import pdb; pdb.set_trace() out_data["features"] = all_feat out_data["experiments"] = all_exps out_data["mice"] = all_mice out_data["date"] = all_dates out_data["labels"] = all_labels # import pdb; pdb.set_trace() return if __name__ == "__main__": opts = create_opts() opts = setup_opts(opts) # create the output directory # paths.setup_output_space(opts["out_dir"]) paths.create_dir(opts["out_dir"]) paths.save_command(opts["out_dir"]) # log the git information git_helper.log_git_status( os.path.join(opts["out_dir"], "00_git_status.txt")) outname = os.path.join(opts["out_dir"], "data.hdf5") logname = os.path.join(opts["out_dir"], "00_log.txt") skipname = os.path.join(opts["out_dir"], "00_skipped.txt") with open(logname, "w") as log: with open(skipname, "w") as skip_log: with h5py.File(outname, "w") as out_data: preprocess_features(opts, log, skip_log, out_data)
def create_train_test(opts): """Create the training and testing splits.""" # first setup the output space. The output space will be in the same folder # as the original data.hdf file, but with different names and a seperate # sub folder for the setup information. base_out = os.path.dirname(opts["flags"].data) log_output_path = os.path.join(base_out, opts["flags"].name) # out_data_name = os.path.join(base_out, opts["flags"].name + ".hdf5") base_out_name = os.path.join(base_out, opts["flags"].name) exp_path = os.path.join(base_out, "exps") paths.create_dir(log_output_path) # add the initial logging information to the output path. git_helper.log_git_status( os.path.join(log_output_path, "00_git_status.txt")) paths.save_command2(log_output_path, opts["argv"]) # now to do the actual splitting. # first open the base data.hdf with h5py.File(opts["flags"].data, "a") as org_data: exp_list = org_data["experiments"].value # get ride of long videos. import pdb pdb.set_trace() exp_mask = hantman_mouse.mask_long_vids(org_data, exp_list) # prune lists further to make an easier dataset. exp_mask = prune_mice_dates(opts, org_data, mask=exp_mask) if opts["flags"].one_mouse is True and opts["flags"].one_day is True: # If one mouse and one date, then just split randomly. num_vids = exp_mask.sum() rand_idx = opts["rng"].permutation(num_vids) # split percentage is 80% (should this be changeable?) split_idx = int(np.floor(num_vids * 0.8)) train_idx = rand_idx[:split_idx] test_idx = rand_idx[split_idx:] elif opts["flags"].one_mouse is False and opts["flags"].one_day is True: print("Not defined.") import pdb pdb.set_trace() else: train_idx, test_idx = hantman_mouse.setup_train_test_samples( opts, org_data, mask=exp_mask) split_name = base_out_name + "_train.hdf5" save_experiments(opts, org_data, exp_path, train_idx, split_name, mask=exp_mask) split_name = base_out_name + "_test.hdf5" save_experiments(opts, org_data, exp_path, test_idx, split_name, mask=exp_mask) print("hi") return
data['label_names'] = label_names data['features'] = all_feats data['labels'] = all_labels # data['crop_first'] = all_crop_first # data['crop_last'] = all_crop_last data['crops'] = all_crops data['num_frames'] = all_num_frames data['frame_idx'] = all_frame_idx # data['org_frames'] = all_org_frames return data if __name__ == "__main__": opts = create_opts() opts = setup_opts(opts) # create the output directory # paths.setup_output_space(opts['out_dir']) paths.create_dir(opts['out_dir']) paths.save_command(opts['out_dir']) # log the git information git_helper.log_git_status( os.path.join(opts['out_dir'], '00_git_status.txt')) data = parse_matfile(opts) outname = os.path.join(opts['out_dir'], 'data.npy') joblib.dump(data, outname) # create the hdf5 verson of the data