Esempio n. 1
0
def setup_output_space(opts):
    # create the output directory
    flags = opts["flags"]
    # Wasn't able to save the GFLAGS object in a pickle easily. So clear the
    # GFLAGS, save the dictionary and on re-load just regen the flags.

    # create the output directories
    out_dir = flags.out_dir
    create_dir(out_dir)
    create_dir(out_dir + "/predictions")
    # create_dir(out_dir + "/predictions/valid")
    # create_dir(out_dir + "/predictions/train")
    # create_dir(out_dir + "/predictions/test")
    create_dir(out_dir + "/plots")
    create_dir(out_dir + "/opts")
    # create_dir(out_dir + "/grads")
    create_dir(out_dir + "/networks")

    # save the opts dict (without the gflags obj).
    opts["flags"] = None
    joblib.dump(opts, os.path.join(flags.out_dir, "opts", "opts.npy"))
    opts["flags"] = flags

    # save the command
    # save_command(opts, flags.out_dir)
    save_command2(flags.out_dir, opts["argv"])

    git_helper.log_git_status(
        os.path.join(opts["flags"].out_dir, "git_status.txt"))

    return opts
def create_train_test(opts):
    """Create the training and testing splits."""
    # first setup the output space. The output space will be in the same folder
    # as the original data.hdf file, but with different names and a seperate
    # sub folder for the setup information.
    base_out = os.path.dirname(opts["flags"].data)
    log_output_path = os.path.join(base_out, opts["flags"].name)
    # out_data_name = os.path.join(base_out, opts["flags"].name + ".hdf5")
    base_out_name = os.path.join(base_out, opts["flags"].name)
    exp_path = os.path.join(base_out, "exps")

    paths.create_dir(log_output_path)

    # add the initial logging information to the output path.
    git_helper.log_git_status(
        os.path.join(log_output_path, "00_git_status.txt"))
    paths.save_command2(log_output_path, opts["argv"])

    # now to do the actual splitting.
    # first open the base data.hdf
    with h5py.File(opts["flags"].data, "r") as org_data:
        exp_list = org_data["exp_names"].value

        exp_mask = numpy.ones((exp_list.shape[0], ), dtype="bool")

        if opts["flags"].split_type == 2:
            train_idx, test_idx, valid_idx = hantman_mouse.setup_full_split2(
                opts,
                org_data,
                mask=exp_mask,
                test_mouse=opts["flags"].test_mouse)
        else:
            # else split type 3... no split type 1
            train_idx, test_idx, valid_idx = hantman_mouse.setup_full_split3(
                opts, org_data, mask=exp_mask)

        split_name = base_out_name + "_train.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         train_idx,
                         split_name,
                         mask=exp_mask)
        split_name = base_out_name + "_test.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         test_idx,
                         split_name,
                         mask=exp_mask)
        split_name = base_out_name + "_valid.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         valid_idx,
                         split_name,
                         mask=exp_mask)

    return
Esempio n. 3
0
def main(argv):
    opts = _setup_opts(sys.argv)
    # paths.setup_output_space(opts)
    out_dir = os.path.join(opts["flags"].out_dir, 'proc_info')
    paths.create_dir(out_dir)
    paths.save_command(opts, out_dir)
    git_helper.log_git_status(os.path.join(out_dir, "git_status.txt"))

    if opts["flags"].cuda_device != -1:
        torch.cuda.set_device(opts["flags"].cuda_device)

    with h5py.File(opts["flags"].train_file, "r") as train_data:
        with h5py.File(opts["flags"].test_file, "r") as test_data:
            with h5py.File(opts["flags"].valid_file, "r") as valid_data:

                sampler = HantmanVideoFrameSampler(
                    opts["rng"],
                    train_data,
                    opts["flags"].video_dir,
                    opts["flags"].hantman_mini_batch,
                    frames=opts["flags"].frames,
                    use_pool=True,
                    gpu_id=opts["flags"].cuda_device)

                label_weight = _get_label_weight(opts, train_data)
                # import pdb; pdb.set_trace()
                train_eval = HantmanVideoSampler(
                    None,
                    train_data,
                    opts["flags"].video_dir,
                    use_pool=True,
                    gpu_id=opts["flags"].cuda_device)
                test_eval = HantmanVideoSampler(
                    None,
                    test_data,
                    opts["flags"].video_dir,
                    use_pool=True,
                    gpu_id=opts["flags"].cuda_device)
                valid_eval = HantmanVideoSampler(
                    None,
                    valid_data,
                    opts["flags"].video_dir,
                    use_pool=True,
                    gpu_id=opts["flags"].cuda_device)

                network, optimizer, criterion = _init_network(
                    opts, label_weight)
                # import pdb; pdb.set_trace()
                _proc_network(opts, network, optimizer, criterion, sampler,
                              train_eval, test_eval, valid_eval)
Esempio n. 4
0
def main(opts):
    # create the output directory
    paths.create_dir(opts["flags"].out_dir)
    paths.save_command2(opts["flags"].out_dir, opts["argv"])

    # log the git information
    git_helper.log_git_status(
        os.path.join(opts["flags"].out_dir, "00_git_status.txt"))

    exp_dir = os.path.join(opts["flags"].out_dir, "exps")
    paths.create_dir(exp_dir)

    outname = os.path.join(opts["flags"].out_dir, "data.hdf5")
    logname = os.path.join(opts["flags"].out_dir, "00_log.txt")
    skipname = os.path.join(opts["flags"].out_dir, "00_skipped.txt")
    with open(logname, "w") as log:
        with open(skipname, "w") as skip_log:
            with h5py.File(outname, "w") as out_data:
                preprocess_features(opts, log, skip_log, out_data, exp_dir)
    # import pdb; pdb.set_trace()
    out_data["features"] = all_feat
    out_data["experiments"] = all_exps
    out_data["mice"] = all_mice
    out_data["date"] = all_dates
    out_data["labels"] = all_labels
    # import pdb; pdb.set_trace()
    return


if __name__ == "__main__":
    opts = create_opts()
    opts = setup_opts(opts)

    # create the output directory
    # paths.setup_output_space(opts["out_dir"])
    paths.create_dir(opts["out_dir"])
    paths.save_command(opts["out_dir"])

    # log the git information
    git_helper.log_git_status(
        os.path.join(opts["out_dir"], "00_git_status.txt"))

    outname = os.path.join(opts["out_dir"], "data.hdf5")
    logname = os.path.join(opts["out_dir"], "00_log.txt")
    skipname = os.path.join(opts["out_dir"], "00_skipped.txt")
    with open(logname, "w") as log:
        with open(skipname, "w") as skip_log:
            with h5py.File(outname, "w") as out_data:
                preprocess_features(opts, log, skip_log, out_data)
Esempio n. 6
0
def create_train_test(opts):
    """Create the training and testing splits."""
    # first setup the output space. The output space will be in the same folder
    # as the original data.hdf file, but with different names and a seperate
    # sub folder for the setup information.
    base_out = os.path.dirname(opts["flags"].data)
    log_output_path = os.path.join(base_out, opts["flags"].name)
    # out_data_name = os.path.join(base_out, opts["flags"].name + ".hdf5")
    base_out_name = os.path.join(base_out, opts["flags"].name)
    exp_path = os.path.join(base_out, "exps")

    paths.create_dir(log_output_path)

    # add the initial logging information to the output path.
    git_helper.log_git_status(
        os.path.join(log_output_path, "00_git_status.txt"))
    paths.save_command2(log_output_path, opts["argv"])

    # now to do the actual splitting.
    # first open the base data.hdf
    with h5py.File(opts["flags"].data, "a") as org_data:
        exp_list = org_data["experiments"].value
        # get ride of long videos.
        import pdb
        pdb.set_trace()
        exp_mask = hantman_mouse.mask_long_vids(org_data, exp_list)
        # prune lists further to make an easier dataset.
        exp_mask = prune_mice_dates(opts, org_data, mask=exp_mask)
        if opts["flags"].one_mouse is True and opts["flags"].one_day is True:
            # If one mouse and one date, then just split randomly.
            num_vids = exp_mask.sum()
            rand_idx = opts["rng"].permutation(num_vids)
            # split percentage is 80% (should this be changeable?)
            split_idx = int(np.floor(num_vids * 0.8))
            train_idx = rand_idx[:split_idx]
            test_idx = rand_idx[split_idx:]
        elif opts["flags"].one_mouse is False and opts["flags"].one_day is True:
            print("Not defined.")
            import pdb
            pdb.set_trace()
        else:
            train_idx, test_idx = hantman_mouse.setup_train_test_samples(
                opts, org_data, mask=exp_mask)

        split_name = base_out_name + "_train.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         train_idx,
                         split_name,
                         mask=exp_mask)
        split_name = base_out_name + "_test.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         test_idx,
                         split_name,
                         mask=exp_mask)

        print("hi")

    return
Esempio n. 7
0
    data['label_names'] = label_names
    data['features'] = all_feats
    data['labels'] = all_labels
    # data['crop_first'] = all_crop_first
    # data['crop_last'] = all_crop_last
    data['crops'] = all_crops
    data['num_frames'] = all_num_frames
    data['frame_idx'] = all_frame_idx
    # data['org_frames'] = all_org_frames

    return data


if __name__ == "__main__":
    opts = create_opts()
    opts = setup_opts(opts)

    # create the output directory
    # paths.setup_output_space(opts['out_dir'])
    paths.create_dir(opts['out_dir'])
    paths.save_command(opts['out_dir'])

    # log the git information
    git_helper.log_git_status(
        os.path.join(opts['out_dir'], '00_git_status.txt'))
    data = parse_matfile(opts)

    outname = os.path.join(opts['out_dir'], 'data.npy')
    joblib.dump(data, outname)

    # create the hdf5 verson of the data