def _train_network(opts, network, optimizer, train_data, test_data, val_data, label_weight):
    """Train the network."""
    print("Beginning training...")
    # train_exps = train_data["experiments"].value
    # train_exps.sort()
    step = 0
    for i in range(opts["flags"].total_epochs):
        print("EPOCH %d, %d" % (i, step))
        network.train()
        step = _train_epoch(opts, step, network, optimizer, train_data, test_data, label_weight)
        print("\tFinished epoch")
        print("\tProcessing all examples...")
        network.eval()
        _log_outputs(opts, step, network, label_weight)
        round_tic = time.time()

        # # save the network in its own folder in the networks folder
        out_dir = os.path.join(
            opts["flags"].out_dir, "networks", "%d" % step)
        paths.create_dir(out_dir)
        out_name = os.path.join(out_dir, "network.pt")
        torch.save(network.cpu().state_dict(), out_name)
        network.cuda()
        # hantman_hungarian_image.save_network(opts, network, out_name)
    print("Finished training.")
def create_train_test(opts):
    """Create the training and testing splits."""
    # first setup the output space. The output space will be in the same folder
    # as the original data.hdf file, but with different names and a seperate
    # sub folder for the setup information.
    base_out = os.path.dirname(opts["flags"].data)
    log_output_path = os.path.join(base_out, opts["flags"].name)
    # out_data_name = os.path.join(base_out, opts["flags"].name + ".hdf5")
    base_out_name = os.path.join(base_out, opts["flags"].name)
    exp_path = os.path.join(base_out, "exps")

    paths.create_dir(log_output_path)

    # add the initial logging information to the output path.
    git_helper.log_git_status(
        os.path.join(log_output_path, "00_git_status.txt"))
    paths.save_command2(log_output_path, opts["argv"])

    # now to do the actual splitting.
    # first open the base data.hdf
    with h5py.File(opts["flags"].data, "r") as org_data:
        exp_list = org_data["exp_names"].value

        exp_mask = numpy.ones((exp_list.shape[0], ), dtype="bool")

        if opts["flags"].split_type == 2:
            train_idx, test_idx, valid_idx = hantman_mouse.setup_full_split2(
                opts,
                org_data,
                mask=exp_mask,
                test_mouse=opts["flags"].test_mouse)
        else:
            # else split type 3... no split type 1
            train_idx, test_idx, valid_idx = hantman_mouse.setup_full_split3(
                opts, org_data, mask=exp_mask)

        split_name = base_out_name + "_train.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         train_idx,
                         split_name,
                         mask=exp_mask)
        split_name = base_out_name + "_test.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         test_idx,
                         split_name,
                         mask=exp_mask)
        split_name = base_out_name + "_valid.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         valid_idx,
                         split_name,
                         mask=exp_mask)

    return
def _train_network(opts, network, optimizer, criterion, sampler, train_eval,
                   test_eval, valid_eval):
    """Train the network."""
    print("Beginning training...")
    # train_exps = train_data["experiments"].value
    # train_exps.sort()
    frame_thresh = [10 for label in g_label_names]
    step = 0
    for i in range(opts["flags"].total_epochs):
        print("EPOCH %d, %d" % (i, step))
        tic = time.time()
        network.train()
        step += _train_epoch(opts, network, optimizer, criterion, sampler)
        print("\t%f" % (time.time() - tic))
        print("\tFinished epoch")

        if i % opts['flags'].update_iterations == 0 and i != 0:
            print("\tProcessing all examples...")
            tic = time.time()
            network.eval()
            train_cost = _eval_network(opts, step, network, train_eval,
                                       criterion, "train")
            if DEBUG:
                test_cost = train_cost
                valid_cost = train_cost
            else:
                test_cost = _eval_network(opts, step, network, test_eval,
                                          criterion, "test")
                valid_cost = _eval_network(opts, step, network, valid_eval,
                                           criterion, "valid")

            sequences_helper.log_outputs3(opts,
                                          step,
                                          train_cost,
                                          test_cost,
                                          valid_cost,
                                          g_label_names,
                                          frame_thresh=frame_thresh)
        if i % opts['flags'].save_iterations == 0:
            # save the network in its own folder in the networks folder
            print("\tSaving network...")
            out_dir = os.path.join(opts["flags"].out_dir, "networks",
                                   "%d" % step)
            paths.create_dir(out_dir)
            out_name = os.path.join(out_dir, "network.pt")
            torch.save(network.cpu().state_dict(), out_name)
            network.cuda()
        print("\tProcessing finished: %f" % (time.time() - tic))

    out_dir = os.path.join(opts["flags"].out_dir, "networks", "%d" % step)
    paths.create_dir(out_dir)
    out_name = os.path.join(out_dir, "network.pt")
    torch.save(network.cpu().state_dict(), out_name)
    network.cuda()
    print("Finished training.")
Exemple #4
0
def main(argv):
    opts = _setup_opts(sys.argv)
    # paths.setup_output_space(opts)
    out_dir = os.path.join(opts["flags"].out_dir, 'proc_info')
    paths.create_dir(out_dir)
    paths.save_command(opts, out_dir)
    git_helper.log_git_status(os.path.join(out_dir, "git_status.txt"))

    if opts["flags"].cuda_device != -1:
        torch.cuda.set_device(opts["flags"].cuda_device)

    with h5py.File(opts["flags"].train_file, "r") as train_data:
        with h5py.File(opts["flags"].test_file, "r") as test_data:
            with h5py.File(opts["flags"].valid_file, "r") as valid_data:

                sampler = HantmanVideoFrameSampler(
                    opts["rng"],
                    train_data,
                    opts["flags"].video_dir,
                    opts["flags"].hantman_mini_batch,
                    frames=opts["flags"].frames,
                    use_pool=True,
                    gpu_id=opts["flags"].cuda_device)

                label_weight = _get_label_weight(opts, train_data)
                # import pdb; pdb.set_trace()
                train_eval = HantmanVideoSampler(
                    None,
                    train_data,
                    opts["flags"].video_dir,
                    use_pool=True,
                    gpu_id=opts["flags"].cuda_device)
                test_eval = HantmanVideoSampler(
                    None,
                    test_data,
                    opts["flags"].video_dir,
                    use_pool=True,
                    gpu_id=opts["flags"].cuda_device)
                valid_eval = HantmanVideoSampler(
                    None,
                    valid_data,
                    opts["flags"].video_dir,
                    use_pool=True,
                    gpu_id=opts["flags"].cuda_device)

                network, optimizer, criterion = _init_network(
                    opts, label_weight)
                # import pdb; pdb.set_trace()
                _proc_network(opts, network, optimizer, criterion, sampler,
                              train_eval, test_eval, valid_eval)
def _train_network(opts, network, optimizer, criterion,
                   sampler, train_eval, test_eval, valid_eval):
    """Train the network."""
    print("Beginning training...")
    frame_thresh = [10, 10, 10, 10, 10, 10]

    step = 0
    for i in range(opts["flags"].total_epochs):
        print("EPOCH %d, %d" % (i, step))
        tic = time.time()
        network.train()
        step += _train_epoch(opts, network, optimizer, criterion, sampler)
        print("\t%f" % (time.time() - tic))
        print("\tFinished epoch")
        if i % opts["flags"].update_iterations == 0:
            network.eval()
            train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\
                _eval_network(opts, step, network, criterion, train_eval,
                              test_eval, valid_eval, frame_thresh=frame_thresh)
            # write to disk
            _write_loss_scores(opts, step, train_loss, test_loss, valid_loss)
            _write_f_scores(opts, step, train_match, test_match, valid_match)
            # save the network in its own folder in the networks folder
            out_dir = os.path.join(
                opts["flags"].out_dir, "networks", "%d" % step)
            paths.create_dir(out_dir)
            out_name = os.path.join(out_dir, "network.pt")
            torch.save(network.cpu().state_dict(), out_name)
            network.cuda()

        print("\tProcessing finished: %f" % (time.time() - tic))

    network.eval()
    train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\
        _eval_network(opts, step, network, criterion, train_eval,
                      test_eval, valid_eval, frame_thresh=frame_thresh)
    _write_loss_scores(opts, step, train_loss, test_loss, valid_loss)
    _write_f_scores(opts, step, train_match, test_match, valid_match)
    # save the network in its own folder in the networks folder
    out_dir = os.path.join(
        opts["flags"].out_dir, "networks", "%d" % step)
    paths.create_dir(out_dir)
    out_name = os.path.join(out_dir, "network.pt")
    torch.save(network.cpu().state_dict(), out_name)
    network.cuda()
    print("Finished training.")
def write_csvs(out_dir, exp_name, label_names, labels, predict):
    # frame, behavior, behavior ground truth, image
    # labels = labels.reshape((labels.shape[0], 1, labels.shape[1]))
    # predict = predict.reshape((predict.shape[0], 1, predict.shape[1]))
    frames = [list(range(labels.shape[0]))]
    temp = [label for label in label_names]
    # for each prediction, update the csv file.
    current_exp_path = out_dir  # os.path.join(out_dir, exp_name)
    paths.create_dir(out_dir)
    paths.create_dir(current_exp_path)

    for j in range(len(temp)):
        # filename = "%03d_predict_%s.csv" % (j, labels[j])
        filename = "odas_%s.csv" % temp[j]
        current_exp_file = os.path.join(current_exp_path, filename)
        with open(current_exp_file, "w") as outfile:
            sequences_helper.write_csv(outfile, temp[j], predict[:, j],
                                       labels[:, j], frames[0])
Exemple #7
0
def main(opts):
    # create the output directory
    paths.create_dir(opts["flags"].out_dir)
    paths.save_command2(opts["flags"].out_dir, opts["argv"])

    # log the git information
    git_helper.log_git_status(
        os.path.join(opts["flags"].out_dir, "00_git_status.txt"))

    exp_dir = os.path.join(opts["flags"].out_dir, "exps")
    paths.create_dir(exp_dir)

    outname = os.path.join(opts["flags"].out_dir, "data.hdf5")
    logname = os.path.join(opts["flags"].out_dir, "00_log.txt")
    skipname = os.path.join(opts["flags"].out_dir, "00_skipped.txt")
    with open(logname, "w") as log:
        with open(skipname, "w") as skip_log:
            with h5py.File(outname, "w") as out_data:
                preprocess_features(opts, log, skip_log, out_data, exp_dir)
def _train_network(opts, network, optimizer, criterion, sampler, train_eval,
                   test_eval, valid_eval):
    """Train the network."""
    print("Beginning training...")
    frame_thresh = [10, 10, 10, 10, 10, 10]

    network.eval()
    step = 0
    train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\
        _eval_network(opts, step, network, criterion, train_eval,
                      test_eval, valid_eval, frame_thresh=frame_thresh)
    _write_loss_scores(opts, step, train_loss, test_loss, valid_loss)
    _write_f_scores(opts, step, train_match, test_match, valid_match)
    # save the network in its own folder in the networks folder
    out_dir = os.path.join(opts["flags"].out_dir, "networks", "%d" % step)
    paths.create_dir(out_dir)
    out_name = os.path.join(out_dir, "network.pt")
    # torch.save(network.cpu().state_dict(), out_name)
    network.cuda()
    print("Finished training.")
def log_info(opts, train_vids, test_vids):
    """Log some other settings for the training setup."""
    out_dir = os.path.join(opts["flags"].out_dir, "info")
    paths.create_dir(out_dir)

    train_txt = os.path.join(out_dir, "train_vids.txt")
    with open(train_txt, "w") as f:
        for train_vid in train_vids:
            f.write("%s\n" % train_vid)

    test_txt = os.path.join(out_dir, "test_vids.txt")
    with open(test_txt, "w") as f:
        for test_vid in test_vids:
            f.write("%s\n" % test_vid)

    info_txt = os.path.join(out_dir, "info.txt")
    with open(info_txt, "w") as f:
        f.write("Num train: %d\n" % len(train_vids))
        f.write("Num test: %d\n" % len(test_vids))
        f.write("Iters per epoch: %d\n" % opts["flags"].iter_per_epoch)
        f.write("Update iterations: %d\n" % opts["flags"].update_iterations)
        f.write("Save iterations: %d\n" % opts["flags"].save_iterations)
Exemple #10
0
def _train_network(opts, network, optimizer, criterion, sampler, train_eval,
                   test_eval, valid_eval):
    """Train the network."""
    print("Beginning training...")
    # train_exps = train_data["experiments"].value
    # train_exps.sort()
    frame_thresh = [10 for label in g_label_names]
    step = 0

    for i in range(opts["flags"].total_epochs):
        print("EPOCH %d, %d" % (i, step))
        tic = time.time()
        network.train()
        step += _train_epoch(opts, network, optimizer, criterion, sampler)
        print("\t%f" % (time.time() - tic))
        print("\tFinished epoch")

        # if DEBUG and i % 20 == 0 and i != 0:
        # if i % 20 == 0 and i != 0:
        # if i % 20 == 0 and i != 0:
        if i % opts["flags"].update_iterations == 0 and i != 0:
            print("\tProcessing all examples...")
            tic = time.time()
            network.eval()
            # train_loss, test_loss, valid_loss = _eval_network(
            #     opts, step, network, criterion, train_eval, test_eval, valid_eval)
            train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\
                _eval_network(opts, step, network, criterion, train_eval,
                            test_eval, valid_eval, frame_thresh=frame_thresh)

            _write_loss_scores(opts, step, train_loss, test_loss, valid_loss)
            _write_f_scores(opts, step, train_match, test_match, valid_match)
            # sequences_helper.log_outputs3(
            #     opts, step, train_cost, test_cost, valid_cost, g_label_names,
            #     frame_thresh=frame_thresh)
            # import pdb; pdb.set_trace()

            # save the network in its own folder in the networks folder
            out_dir = os.path.join(opts["flags"].out_dir, "networks",
                                   "%d" % step)
            paths.create_dir(out_dir)
            out_name = os.path.join(out_dir, "network.pt")
            torch.save(network.cpu().state_dict(), out_name)
            network.cuda()
            print("\tProcessing finished: %f" % (time.time() - tic))

        network.eval()
        # train_loss, test_loss, valid_loss = _eval_network(
        #     opts, step, network, criterion, train_eval, test_eval, valid_eval)
        train_loss, train_match, test_loss, test_match, valid_loss, valid_match =\
            _eval_network(opts, step, network, criterion, train_eval,
                        test_eval, valid_eval, frame_thresh=frame_thresh)

        _write_loss_scores(opts, step, train_loss, test_loss, valid_loss)
        _write_f_scores(opts, step, train_match, test_match, valid_match)
        # save the network in its own folder in the networks folder
        out_dir = os.path.join(opts["flags"].out_dir, "networks", "%d" % step)
        paths.create_dir(out_dir)
        out_name = os.path.join(out_dir, "network.pt")
        torch.save(network.cpu().state_dict(), out_name)
        network.cuda()

    print("Finished training.")
    # import pdb; pdb.set_trace()
    out_data["features"] = all_feat
    out_data["experiments"] = all_exps
    out_data["mice"] = all_mice
    out_data["date"] = all_dates
    out_data["labels"] = all_labels
    # import pdb; pdb.set_trace()
    return


if __name__ == "__main__":
    opts = create_opts()
    opts = setup_opts(opts)

    # create the output directory
    # paths.setup_output_space(opts["out_dir"])
    paths.create_dir(opts["out_dir"])
    paths.save_command(opts["out_dir"])

    # log the git information
    git_helper.log_git_status(
        os.path.join(opts["out_dir"], "00_git_status.txt"))

    outname = os.path.join(opts["out_dir"], "data.hdf5")
    logname = os.path.join(opts["out_dir"], "00_log.txt")
    skipname = os.path.join(opts["out_dir"], "00_skipped.txt")
    with open(logname, "w") as log:
        with open(skipname, "w") as skip_log:
            with h5py.File(outname, "w") as out_data:
                preprocess_features(opts, log, skip_log, out_data)
def create_opts():
    """Create an opts dictionary."""
    opts = dict()
    opts["filename"] = ""
    opts["out_dir"] = ""
    opts["exp_dir"] = g_exp_dir
    opts["all_exp"] = g_all_exp_dir
    return opts


if __name__ == "__main__":
    opts = create_opts()
    opts = setup_opts(opts)

    # create the output directory
    paths.create_dir(opts["out_dir"])
    paths.create_dir(os.path.join(opts["out_dir"], "exps"))
    paths.save_command(opts["out_dir"])

    # log the git information
    # git_helper.log_git_status(
    #     os.path.join(opts["out_dir"], "00_git_status.txt"))

    # try to load the locations of the original experiments.
    h5filename = os.path.join(opts["out_dir"], "00_exp_cache.hdf5")
    h5file = h5py.File(h5filename, "a")

    # load the mat file
    matfile = sio.loadmat(opts["filename"])

    logfilename = os.path.join(opts["out_dir"], "00_log.txt")
Exemple #13
0
def create_train_test(opts):
    """Create the training and testing splits."""
    # first setup the output space. The output space will be in the same folder
    # as the original data.hdf file, but with different names and a seperate
    # sub folder for the setup information.
    base_out = os.path.dirname(opts["flags"].data)
    log_output_path = os.path.join(base_out, opts["flags"].name)
    # out_data_name = os.path.join(base_out, opts["flags"].name + ".hdf5")
    base_out_name = os.path.join(base_out, opts["flags"].name)
    exp_path = os.path.join(base_out, "exps")

    paths.create_dir(log_output_path)

    # add the initial logging information to the output path.
    git_helper.log_git_status(
        os.path.join(log_output_path, "00_git_status.txt"))
    paths.save_command2(log_output_path, opts["argv"])

    # now to do the actual splitting.
    # first open the base data.hdf
    with h5py.File(opts["flags"].data, "a") as org_data:
        exp_list = org_data["experiments"].value
        # get ride of long videos.
        import pdb
        pdb.set_trace()
        exp_mask = hantman_mouse.mask_long_vids(org_data, exp_list)
        # prune lists further to make an easier dataset.
        exp_mask = prune_mice_dates(opts, org_data, mask=exp_mask)
        if opts["flags"].one_mouse is True and opts["flags"].one_day is True:
            # If one mouse and one date, then just split randomly.
            num_vids = exp_mask.sum()
            rand_idx = opts["rng"].permutation(num_vids)
            # split percentage is 80% (should this be changeable?)
            split_idx = int(np.floor(num_vids * 0.8))
            train_idx = rand_idx[:split_idx]
            test_idx = rand_idx[split_idx:]
        elif opts["flags"].one_mouse is False and opts["flags"].one_day is True:
            print("Not defined.")
            import pdb
            pdb.set_trace()
        else:
            train_idx, test_idx = hantman_mouse.setup_train_test_samples(
                opts, org_data, mask=exp_mask)

        split_name = base_out_name + "_train.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         train_idx,
                         split_name,
                         mask=exp_mask)
        split_name = base_out_name + "_test.hdf5"
        save_experiments(opts,
                         org_data,
                         exp_path,
                         test_idx,
                         split_name,
                         mask=exp_mask)

        print("hi")

    return
Exemple #14
0
    data['label_names'] = label_names
    data['features'] = all_feats
    data['labels'] = all_labels
    # data['crop_first'] = all_crop_first
    # data['crop_last'] = all_crop_last
    data['crops'] = all_crops
    data['num_frames'] = all_num_frames
    data['frame_idx'] = all_frame_idx
    # data['org_frames'] = all_org_frames

    return data


if __name__ == "__main__":
    opts = create_opts()
    opts = setup_opts(opts)

    # create the output directory
    # paths.setup_output_space(opts['out_dir'])
    paths.create_dir(opts['out_dir'])
    paths.save_command(opts['out_dir'])

    # log the git information
    git_helper.log_git_status(
        os.path.join(opts['out_dir'], '00_git_status.txt'))
    data = parse_matfile(opts)

    outname = os.path.join(opts['out_dir'], 'data.npy')
    joblib.dump(data, outname)

    # create the hdf5 verson of the data
Exemple #15
0
    # all_feat = numpy.concatenate(all_feat)
    # true_mean = all_feat.mean(axis=0)
    # true_std = all_feat.std(axis=0)
    # print numpy.any(numpy.abs(true_mean - data_mean) > 0.001)
    # print numpy.any(numpy.abs(true_std - data_std) > 0.001)
    return data_mean, data_std


if __name__ == "__main__":
    used_exp_filename = "/localhome/kwaki/data/hantman/used_exps.txt"
    data_dir = "/media/drive1/data/hantman/"
    out_dir = "/media/drive1/data/hantman_processed/hoghof/"
    # out_dir =\
    #     "/media/drive1/data/hantman_processed/hoghof_single_mouse_test/"

    paths.create_dir(out_dir)
    filename = os.path.join(out_dir, "data.hdf5")

    # to help make the conversion easier, load up a previously created
    # data file.
    all_data = joblib.load(("/media/drive1/data/hantman_processed/"
                            "joblib/test/data.npy"))
    # "joblib/relative_39window/data.npy"))

    lines = []
    with open(used_exp_filename, "r") as exp_file:
        lines = exp_file.readlines()
        for i in range(len(lines)):
            lines[i] = lines[i].rstrip()

    with h5py.File(filename, "w") as hdf5_file: