def evaluate_fsrcnn(cp=12):
    resultlist = []
    for d in [240, 280]:
        for s in [48, 64]:
            for m in [2, 3, 4]:
                name = 'FSRCNN_d{0:}_s{1:}_m{2:}'.format(d, s, m)
                for run in range(2):
                    resultlist.append([d, s, m, run])
                    for mode in ['validation', 'test']:
                        savep = utils.get_save_path(name,
                                                    exp_no=run,
                                                    ep_no=cp,
                                                    mode=mode)
                        resultlist[-1] += list(main(savep, mode))
    as_str = tabulate.tabulate(resultlist,
                               headers=[
                                   'd', 's', 'm', 'run', 'mse valid',
                                   'psnr valid', 'bc_mse valid',
                                   'bc_psnr valid', 'mse test', 'psnr test',
                                   'bc_mse test', 'bc_psnr test'
                               ])

    file = open('../results_keras/summaries/FSRCNN_eval.txt', 'w')
    file.write(as_str)
    file.close()
 def __init__(self, dataset, ds_metric, algo):
     if ds_metric == 'ged':
         self.dist_sim_func = ged
         ds = 'dist'
     elif ds_metric == 'glet':  # graphlet similarity metric
         self.dist_sim_func = None  # should be pre-computed and loaded
         ds = 'sim'
     elif ds_metric == 'mcs':
         self.dist_sim_func = mcs
         ds = 'dist'
     else:
         raise RuntimeError(
             'Unknwon distance/similarity metric {}'.format(ds_metric))
     self.sfn = '{}/{}_{}_{}{}_gidpair_{}_map'.format(
         get_save_path(), dataset, ds_metric, algo, '' if algo == 'astar'
         or algo == 'graphlet' or algo == 'mccreesh2017' else '_revtakemin',
         ds)
     self.algo = algo
     self.gidpair_ds_map = load(self.sfn)
     if not self.gidpair_ds_map:
         self.gidpair_ds_map = OrderedDict()
         save(self.sfn, self.gidpair_ds_map)
         print('Saved dist/sim map to {} with {} entries'.format(
             self.sfn, len(self.gidpair_ds_map)))
     else:
         print('Loaded dist/sim map from {} with {} entries'.format(
             self.sfn, len(self.gidpair_ds_map)))
def get_gs_ds_mat(gs1, gs2, dist_sim_calculator, tvt1, tvt2,
                  dataset, dist_metric, dist_algo, norm, dec_gsize, return_neg1=False):
    mat_str = '{}({})_{}({})'.format(tvt1, len(gs1), tvt2, len(gs2))
    dir = '{}/ds_mat'.format(get_save_path())
    create_dir_if_not_exists(dir)
    sfn = '{}/{}_{}_ds_mat_{}{}_{}'.format(
        dir, dataset, mat_str, dist_metric,
        get_norm_str(norm), dist_algo)
    l = load(sfn)
    if l is not None:
        print('Loaded from {}'.format(sfn))
        return l
    m = len(gs1)
    n = len(gs2)
    dist_mat = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            g1 = gs1[i]
            g2 = gs2[j]
            d, normed_d = dist_sim_calculator.calculate_dist_sim(
                g1, g2, dec_gsize=dec_gsize, return_neg1=return_neg1)
            if norm:
                dist_mat[i][j] = normed_d
            else:
                dist_mat[i][j] = d
    save(sfn, dist_mat)
    print('Saved to {}'.format(sfn))
    return dist_mat
Beispiel #4
0
def main_func():
    # Select input file
    file_in = utils.get_open_path('Select input file')
    if not file_in:
        raise Exception('Input file selection aborted')

    # Select output file
    file_out = utils.get_save_path('Select output file')
    if not file_out:
        raise Exception('Output file selection aborted')

    # Set xarray to keep attributes for DataArrays and Datasets
    xr.set_options(keep_attrs=True)

    # Open file into a Dataset
    ds = xr.open_dataset(file_in, engine='netcdf4', mask_and_scale=False)

    # Convert calendar to standard one
    utils.convert_calendar(ds)

    # Add to file history
    utils.add_to_history(ds=ds,
                         txt='Drozdowski: set calendar to standard',
                         prepend=True)

    # No encodings because they interfere with calendar setting
    encodings = {}

    # Save Dataset to file with encodings
    ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings)

    # Close Dataset file
    ds.close()

    print('Done!!!')
    def __init__(self, dataset, ds_metric, algo):
        if logging_enabled == True:
            print(
                "- Entered dist_sim_calculator::DistSimCalculator::__init__ Constructor Method"
            )

        if ds_metric == 'ged':
            self.dist_sim_func = None
            ds = 'dist'
        else:
            raise RuntimeError(
                'Unknown distance\similarity metric {}'.format(ds_metric))

        self.sfn = '{}\\{}_{}_{}{}_gidpair_{}_map'.format(
            get_save_path(), dataset, ds_metric, algo, '' if algo == 'astar'
            or algo == 'graphlet' or algo == 'mccreesh2017' else '_revtakemin',
            ds)

        self.algo = algo
        self.gidpair_ds_map = load(self.sfn)

        if not self.gidpair_ds_map:
            self.gidpair_ds_map = OrderedDict()
            save(self.sfn, self.gidpair_ds_map)
            print('info: Saved dist/sim map to {} with {} entries'.format(
                self.sfn, len(self.gidpair_ds_map)))
        else:
            print('info: Loaded dist/sim map from {} with {} entries'.format(
                self.sfn, len(self.gidpair_ds_map)))
def main_evaluate_unets(cp=49):
    resultlist = []
    for n_l in [4, 3, 2]:
        for n_f in [64, 32]:
            for n_c in [3, 2]:
                deconv = True
                name = 'Unet_nl{0:}_nc{1:}_nf{2:}_dc{3:}'.format(
                    n_l, n_c, n_f, int(deconv))
                run = 0
                resultlist.append([n_l, n_f, n_c, run])
                for mode in ['validation', 'test']:
                    savep = utils.get_save_path(name,
                                                exp_no=run,
                                                ep_no=cp,
                                                mode=mode)
                    resultlist[-1] += list(main(savep, mode))

    as_str = tabulate.tabulate(resultlist,
                               headers=[
                                   'num_levels', 'start_num_filters',
                                   'num_convs', 'run', 'mse valid',
                                   'psnr valid', 'bc_mse valid',
                                   'bc_psnr valid', 'mse test', 'psnr test',
                                   'bc_mse test', 'bc_psnr test'
                               ])
    file = open('../results_keras/summaries/Unet_eval.txt', 'w')
    file.write(as_str)
    file.close()
Beispiel #7
0
def load_data():
    dir = join(get_save_path(), 'split')
    dataset_name = FLAGS.dataset
    train_ratio = int(FLAGS.tvt_ratio[0] * 100)
    val_ratio = int(FLAGS.tvt_ratio[1] * 100)
    test_ratio = 100 - train_ratio - val_ratio
    if 'presplit' not in dataset_name:
        save_fn = '{}_train_{}_val_{}_test_{}_seed_{}_window_size_{}'.format(dataset_name, train_ratio,
                                                              val_ratio, test_ratio,
                                                              FLAGS.random_seed, FLAGS.word_window_size)
    else:
        save_fn = '{}_train_val_test_{}_window_size_{}'.format(dataset_name, FLAGS.random_seed, FLAGS.word_window_size)
    path = join(dir, save_fn)
    rtn = load(path)
    if rtn:
        train_data, val_data, test_data = rtn['train_data'], rtn['val_data'], rtn['test_data']
    else:
        train_data, val_data, test_data = _load_tvt_data_helper()
        save({'train_data': train_data, 'val_data': val_data, 'test_data': test_data}, path)
    dataset = FLAGS.dataset
    if "small" in dataset or "presplit" in dataset or 'sentiment' in dataset:
        dataset_name = "_".join(dataset.split("_")[:-1])
    else:
        dataset_name = dataset

    orig_text_path = join(get_corpus_path(), dataset_name + "_sentences.txt")
    raw_doc_list = []
    f = open(orig_text_path, 'rb')
    for line in f.readlines():
        raw_doc_list.append(line.strip().decode())
    f.close()

    return train_data, val_data, test_data, raw_doc_list
def main_func():
    # Select input file
    file_in = utils.get_open_path('Select input file')
    if not file_in:
        raise Exception('Input file selection aborted')

# Select output file
    file_out = utils.get_save_path('Select output file')
    if not file_out:
        raise Exception('Output file selection aborted')

    # Set xarray to keep attributes for DataArrays and Datasets
    xr.set_options(keep_attrs=True)

    # Open file into a Dataset
    ds = xr.open_dataset(file_in, engine='netcdf4', mask_and_scale=False)

    # Iterate each variable, looking for those with 4 dimensions
    for var_name in ds.data_vars:
        da = ds[var_name]
        dims = da.dims
        if len(dims) != 4:
            continue
        # Rearrange data so that 2nd dimension becomes 1st dimension
        da = da.transpose(dims[1], dims[0], dims[2], dims[3])
        n = 0
        # Get dimension data for 2nd dimension
        dims_data = da[dims[1]].data
        # Iterate each sub-DataArray in rearranged data
        for da_sub in da:
            # Assemble a name for the sub-DataArray
            name = da_sub.name + '_' + dims[1] + '_' + str(dims_data[n])
            # Assign sub-DataArray to a new variable in Dataset
            ds[name] = da_sub
            n += 1

    # Convert calendar to standard one
    utils.convert_calendar(ds)

    # Add to file history
    utils.add_to_history(
        ds=ds,
        txt='Drozdowski: explode 4D variables into multiple 3D variables',
        prepend=True)
    utils.add_to_history(ds=ds,
                         txt='Drozdowski: set calendar to standard',
                         prepend=True)

    # Get default encodings for use with Dataset::to_netcdf() method
    encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4)

    # Save file with above encoding
    ds.to_netcdf(path=file_out, encoding=encodings)

    # Close Dataset file
    ds.close()

    print('Done!!!')
def main_evaluate_checkpoints(
        name='FSRCNN_d{0:}_s{1:}_m{2:}'.format(240, 64, 3), run=2):
    resultlist = []
    for cp in range(1, 50):
        resultlist.append([cp])
        for mode in ['validation', 'test']:
            savep = utils.get_save_path(name, exp_no=run, ep_no=cp)
            resultlist[-1] += main(savep, mode)
    as_str = tabulate.tabulate(resultlist,
                               headers=[
                                   'it', 'mse valid', 'psnr valid',
                                   'bc_mse valid', 'bc_psnr valid', 'mse test',
                                   'psnr test', 'bc_mse test', 'bc_psnr_test'
                               ])

    summary_file = open(
        os.path.dirname(utils.get_save_path(name, run, cp)) +
        '/checkpoint_eval.txt', 'w')
    summary_file.write(as_str)
    summary_file.close()
def get_gs_ds_mat(gs1,
                  gs2,
                  dist_sim_calculator,
                  tvt1,
                  tvt2,
                  dataset,
                  dist_metric,
                  dist_algo,
                  norm,
                  dec_gsize,
                  return_neg1=False):

    if logging_enabled == True:
        print("- Entered dist_sim_calculator::get_gs_ds_mat Global Method")

    mat_str = '{}({})_{}({})'.format(tvt1, len(gs1), tvt2, len(gs2))
    dir = '{}\\ds_mat'.format(get_save_path())
    create_dir_if_not_exists(dir)
    sfn = '{}\\{}_{}_ds_mat_{}{}_{}'.format(dir, dataset, mat_str, dist_metric,
                                            get_norm_str(norm), dist_algo)

    l = load(sfn)

    if l is not None:
        print('Loaded from {}'.format(sfn))
        return l

    if not dist_sim_calculator.gidpair_ds_map:
        # dist_sim_calculator.initial_calculate_dist_sim(gs1, gs2)
        dist_sim_calculator.initial_dist_sim_pairs_with_netcomp(gs1, gs2)

    m = len(gs1)
    n = len(gs2)

    dist_mat = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            g1 = gs1[i]
            g2 = gs2[j]
            d, normed_d = dist_sim_calculator.calculate_dist_sim(
                g1, g2, dec_gsize=dec_gsize, return_neg1=return_neg1)
            if norm:
                dist_mat[i][j] = normed_d
                print("i: ", i, ", j: ", j, ", d: ", d, ", normed_d: ",
                      normed_d)

            else:
                dist_mat[i][j] = d

    save(sfn, dist_mat)
    print('Saved to {}'.format(sfn))

    return dist_mat
Beispiel #11
0
    def _load_train_triples(self, data, dist_calculator):
        gs = [g.nxgraph for g in data.train_gs]
        dist_mat = get_gs_dist_mat(gs, gs, dist_calculator, 'train', 'train',
                                   FLAGS.dataset, FLAGS.dist_metric,
                                   FLAGS.dist_algo, FLAGS.dist_norm)
        m, n = dist_mat.shape
        triples = []

        generate_flag = FLAGS.fake_generation is not None
        repeat_flag = FLAGS.top_repeater is not None
        if generate_flag:
            assert ('fake_' in FLAGS.fake_generation)
            assert (not FLAGS.top_repeater)
            fake_num = int(FLAGS.fake_generation.split('_')[1])
            filepath = get_save_path() + '/{}_fake_{}'.format(
                FLAGS.dataset, fake_num)
            load_data = load(filepath)
            if load_data:
                print('Loaded from {} with {} triples'.format(
                    filepath, len(load_data.li)))
                return load_data
            node_feat_encoder = data.node_feat_encoder
        elif repeat_flag:
            assert ('_repeat_' in FLAGS.top_repeater)
            assert (not FLAGS.fake_generation)
            top_num = int(FLAGS.top_repeater.split('_')[0])
            repeat_num = int(FLAGS.top_repeater.split('_')[2])

        dist_mat_idx = np.argsort(dist_mat, axis=1)
        for i in range(m):
            g1 = data.train_gs[i]
            if generate_flag:
                sample_graphs, sample_geds = graph_generator(
                    g1.nxgraph, fake_num)
                print(i, m, sample_geds)
                for sample_g, sample_ged in zip(sample_graphs, sample_geds):
                    triples.append(
                        (ModelGraph(g1.nxgraph, node_feat_encoder),
                         ModelGraph(sample_g, node_feat_encoder),
                         self.sim_kernel.dist_to_sim_np(sample_ged)))
            for j in range(n):
                col = dist_mat_idx[i][j]
                g2, ged = data.train_gs[col], dist_mat[i][col]
                triples.append((g1, g2, self.sim_kernel.dist_to_sim_np(ged)))
                if repeat_flag and j <= top_num:
                    for _ in range(repeat_num):
                        triples.append(
                            (g1, g2, self.sim_kernel.dist_to_sim_np(ged)))
        rtn = SelfShuffleList(triples)
        if generate_flag:
            save(filepath, rtn)
            print('Saved to {} with {} triples'.format(filepath, len(rtn.li)))
        return rtn
def get_raw_data(filename):
    csv_filename, np_filename = utils.get_save_path(filename)
    position_df = pd.read_csv(csv_filename+"_worldpos.csv", usecols=['Hips.X','Hips.Y','Hips.Z'])
    rotation_df = pd.read_csv(csv_filename+"_rotations.csv")
    
    #data = utils.get_processed_data(csv_filename, np_filename, training_split, processes, processes)
    data = rotation_df.copy()
    # Add the root (hip) data for spacial movement
    data['Hips.Pos.X'] = position_df.copy().pop('Hips.X')
    data['Hips.Pos.Y'] = position_df.copy().pop('Hips.Y')
    data['Hips.Pos.Z'] = position_df.copy().pop('Hips.Z')
    return data
Beispiel #13
0
def _load_tvt_data_helper():
    dir = join(get_save_path(), 'all')
    path = join(dir, FLAGS.dataset + '_all_window_' + str(FLAGS.word_window_size))
    rtn = load(path)
    if rtn:
        dataset = TextDataset(None, None, None, None, None, None, rtn)
    else:
        dataset = build_text_graph_dataset(FLAGS.dataset, FLAGS.word_window_size)
        gc.collect()
        save(dataset.__dict__, path)

    train_dataset, val_dataset, test_dataset = dataset.tvt_split(FLAGS.tvt_ratio[:2], FLAGS.tvt_list, FLAGS.random_seed)
    return train_dataset, val_dataset, test_dataset
Beispiel #14
0
def run_evaluation(exp_name,
                   run,
                   ep_no,
                   inner_cube=(24, 48, 48),
                   bs=6,
                   resolution=16):
    for mode in ['validation', 'test']:
        modelp = utils.get_model_path(exp_name, exp_no=run, ep_no=ep_no)
        savep = utils.get_save_path(exp_name,
                                    exp_no=run,
                                    ep_no=ep_no,
                                    mode=mode)
        simple_evaluator = Evaluator(modelp, savep,
                                     utils.get_data_path(mode, resolution))
        simple_evaluator.run_full_evaluation(inner_cube=inner_cube, bs=bs)
def get_data(filename, process, train_split):
    csv_filename, np_filename = utils.get_save_path(filename)
    position_df = pd.read_csv(csv_filename+"_worldpos.csv", usecols=['Hips.X','Hips.Y','Hips.Z'])
    rotation_df = pd.read_csv(csv_filename+"_rotations.csv")
    
    #print(position_df.head())
    position_df = _pre_process_pos_data(position_df, process, train_split)    
    rotation_df = _pre_process_rot_data(rotation_df, process, train_split)
    #print(position_df.head())
    
    data = rotation_df.copy()
    # Add the root (hip) data for spacial movement
    data['Hips.Pos.X'] = position_df.copy().pop('Hips.X')
    data['Hips.Pos.Y'] = position_df.copy().pop('Hips.Y')
    data['Hips.Pos.Z'] = position_df.copy().pop('Hips.Z')
    return data
Beispiel #16
0
def shifted_evaluation(exp_name, run, cp, resolution=16):
    for mode in ['validation', 'test']:
        modelp = utils.get_model_path(exp_name, exp_no=run, ep_no=cp)
        shifted_evaluator = Evaluator(modelp, '',
                                      utils.get_data_path(mode, resolution))
        for shift in range(int(shifted_evaluator.sc)):
            savep = utils.get_save_path(exp_name,
                                        exp_no=run,
                                        ep_no=cp,
                                        mode=mode,
                                        add='_shift' + str(shift))
            shifted_evaluator.reset_save_path(savep)
            shifted_evaluator.run_full_evaluation(inner_cube=(24, 48, 48),
                                                  bs=6,
                                                  safety_margin=(shift,
                                                                 -shift))
def main_func():
    # Select input file
    file_in = utils.get_open_path('Select input file')
    if not file_in:
        raise Exception('Input file selection aborted')

    # Select output file
    file_out = utils.get_save_path('Select output file')
    if not file_out:
        raise Exception('Output file selection aborted')

    def is_leap_day(dt):
        return (dt.year % 4 == 0) & (
            (dt.year % 100 != 0) |
            (dt.year % 400 == 0)) & (dt.month == 2) & (dt.day == 29)

    # Set xarray to keep attributes for DataArrays and Datasets
    xr.set_options(keep_attrs=True)

    # Open file into a Dataset
    ds = xr.open_dataset(file_in, engine='netcdf4', mask_and_scale=False)

    # Select records that aren't leap days into a Dataset
    mask = is_leap_day(ds.time.dt)
    ds = ds.sel(time=~(mask))

    # Convert calendar to noleap
    utils.convert_calendar(ds, 'noleap')

    # Add to file history
    utils.add_to_history(ds=ds,
                         txt='Drozdowski: remove leap days',
                         prepend=True)
    utils.add_to_history(ds=ds,
                         txt='Drozdowski: set calendar to noleap',
                         prepend=True)

    # Get default encodings for use with Dataset::to_netcdf() method
    encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4)

    # Save Dataset to file with encodings
    ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings)

    # Close Dataset file
    ds.close()

    print('Done!!!')
Beispiel #18
0
 def _load_json_emb(self):
     fn = get_save_path() + '/{}_graph2vec_json_dict.pkl'.format(
         self.dataset)
     if isfile(fn):
         with open(fn, 'rb') as handle:
             d = load_pkl(handle)
             print('Loaded json dict from {}'.format(fn))
             return d
     dfn = get_model_path(
     ) + '/graph2vec_tf/embeddings/{}_train_test_dims_{}_epochs_1000_lr_0.3_embeddings.txt'.format(
         self.dataset, self.dim)
     with open(dfn) as json_data:
         d = json.load(json_data)
     with open(fn, 'wb') as handle:
         save_pkl(d, handle)
         print('Loaded json dict from {}\nSaved to {}'.format(dfn, fn))
     return d
Beispiel #19
0
 def __init__(self, dataset, dist_metric, algo):
     self.sfn = '{}/{}_{}_{}{}_gidpair_dist_map'.format(
         get_save_path(), dataset, dist_metric, algo,
         '' if algo == 'astar' else '_revtakemin')
     self.algo = algo
     self.gidpair_dist_map = load(self.sfn)
     if not self.gidpair_dist_map:
         self.gidpair_dist_map = OrderedDict()
         save(self.sfn, self.gidpair_dist_map)
         print('Saved dist map to {} with {} entries'.format( \
             self.sfn, len(self.gidpair_dist_map)))
     else:
         print('Loaded dist map from {} with {} entries'.format( \
             self.sfn, len(self.gidpair_dist_map)))
     if dist_metric == 'ged':
         self.dist_func = ged
     else:
         raise RuntimeError('Unknwon distance metric {}'.format(dist_metric))
def main_evaluate_fsrcnn_longrun(run=2, cp=49):
    resultlist = []
    for d, s, m in zip([240, 240, 280], [64, 64, 64], [3, 2, 2]):
        name = 'FSRCNN_d{0:}_s{1:}_m{2:}'.format(d, s, m)
        resultlist.append([d, s, m, run])
        for mode in ['validation', 'test']:
            savep = utils.get_save_path(name, exp_no=run, ep_no=cp, mode=mode)
            resultlist[-1] += list(main(savep, mode))
    as_str = tabulate.tabulate(resultlist,
                               headers=[
                                   'd', 's', 'm', 'run', 'mse valid',
                                   'psnr valid', 'bc_mse valid',
                                   'bc_psnr valid', 'mse test', 'psnr test',
                                   'bc_mse test', 'bc_psnr test'
                               ])
    file = open('../results_keras/summaries/FSRCNN_eval_longrun.txt', 'w')
    file.write(as_str)
    file.close()
Beispiel #21
0
def compare_processed_error(vert_axis=None):
    dance_index = 92
    dances = utils.get_unique_dance_names(csv_data_dir)
    dances.sort()
    csv_filename, np_filename = utils.get_save_path(dances[dance_index])
    print(csv_filename)
    hierarchy_file = os.path.join(hierarchy_dir, "AI_hierarchy.csv")
    hierarchy_df = pd.read_csv(hierarchy_file)
    raw_position, raw_rotation = get_raw_data(dances[dance_index])
    c_headers = [
        c for c in raw_rotation.columns if 'End' not in c and 'Time' not in c
    ]
    full_headers = [
        c for c in raw_rotation.columns if 'End' not in c and 'Time' not in c
    ]
    full_headers.append('Hips.Pos.X')
    full_headers.append('Hips.Pos.Y')
    full_headers.append('Hips.Pos.Z')
    raw_position.columns = full_headers[-3:]

    print_header("Vertical Axis: {}".format(vert_axis))

    rel = _pre_process_pos_data(raw_position.copy(), True, False,
                                training_split, vert_axis)
    position_df = _post_process_pos_data(rel, hierarchy_df, True, False,
                                         training_split, vert_axis)
    print(position_df.head())
    compute_differences(position_df, raw_position, "Relativized")

    rel = _pre_process_pos_data(raw_position.copy(), False, True,
                                training_split, vert_axis)
    position_df = _post_process_pos_data(rel, hierarchy_df, False, True,
                                         training_split, vert_axis)
    print(position_df.head())
    compute_differences(position_df, raw_position, "Standardized")

    rel = _pre_process_pos_data(raw_position.copy(), True, True,
                                training_split, vert_axis)
    position_df = _post_process_pos_data(rel, hierarchy_df, True, True,
                                         training_split, vert_axis)
    print(position_df.head())
    compute_differences(position_df, raw_position,
                        "Relativized + Standardized")
def bicubic_main(mode='validation', sc=4.):
    filename = utils.get_save_path('FSRCNN_d{0:}_s{1:}_m{2:}'.format(
        240, 64, 2),
                                   exp_no=2,
                                   ep_no=49,
                                   mode=mode)
    prediction = np.array(h5py.File(filename, 'r')['raw'])
    gt = np.array(
        h5py.File(
            '/nrs/saalfeld/heinrichl/SR-data/FIBSEM/downscaled/bigh5-16iso/' +
            mode + '.h5', 'r')['raw']) / 255.
    gt = np.squeeze(gt)
    downscaled = utils.downscale_manually(gt, sc)

    bicubic = utils.bicubic_up(downscaled, sc, 0)
    prediction, [bicubic] = utils.cut_to_same_size(prediction, [bicubic])
    mse, psnr, bicubic_weighted_mse, bicubic_weighted_psnr = run_eval(
        gt, bicubic)
    return mse, psnr, bicubic_weighted_mse, bicubic_weighted_psnr
def load_dataset(name, tvt, align_metric, node_ordering):
    name_list = [name]
    if not name or type(name) is not str:
        raise ValueError('name must be a non-empty string')
    check_tvt(tvt)
    name_list.append(tvt)
    check_align(align_metric)
    name_list.append(align_metric)
    if node_ordering is None:
        node_ordering = 'noordering'
    elif node_ordering == 'bfs':
        pass
    else:
        raise ValueError('Unknown node ordering {}'.format(node_ordering))
    name_list.append(node_ordering)
    full_name = '_'.join(name_list)
    p = join(get_save_path(), 'dataset', full_name)
    ld = load(p)
    '''
    ######### this is solely for running locally lol #########
    ld['pairs'] = {(1022,1023):ld['pairs'][(1022,1023)],\
                   (1036,1037):ld['pairs'][(1036,1037)], \
                   (104,105):ld['pairs'][(104,105)],\
                   (1042,1043):ld['pairs'][(1042,1043)],\
                   (1048,1049):ld['pairs'][(1048,1049)],\
                   }
    '''
    if ld:
        _, _, _, _, _, dataset_type = get_dataset_conf(name)
        if dataset_type == 'OurDataset':
            rtn = OurDataset(None, None, None, None, None, None, None, None, ld)
        elif dataset_type == 'OurOldDataset':
            rtn = OurOldDataset(None, None, None, None, None, None, None, None,
                          None, None, ld)
        else:
            raise NotImplementedError()
    else:
        rtn = _load_dataset_helper(name, tvt, align_metric, node_ordering)
        save(rtn.__dict__, p)
    if rtn.num_graphs() == 0:
        raise ValueError('{} has 0 graphs'.format(name))
    return rtn
Beispiel #24
0
 def _load_create_fake_pairs_if_needed(self, data):
     if FLAGS.fake_generation:
         assert ('fake_' in FLAGS.fake_generation)
         fake_num = int(FLAGS.fake_generation.split('_')[1])
         dir = get_save_path() + '/siamese_regession_fake_pairs'
         if FLAGS.ds_metric == 'mcs':
             dir += '_mcs'
         create_dir_if_not_exists(dir)
         filepath = dir + '/{}_fake_{}'.format(FLAGS.dataset_train,
                                               fake_num)
         ld = load(filepath)
         if ld:
             print('Loaded from {} with {} fake triples'.format(
                 filepath, len(ld)))
             return ld
         rtn = self._create_fake_pairs(data, fake_num)
         save(filepath, rtn)
         return rtn
     else:
         return []
def main_evaluate_shift(exp_name, run, cp, sc=4):
    resultlist = []
    for shift in range(sc):
        resultlist.append([shift])
        for mode in ['validation', 'test']:
            savep = utils.get_save_path(exp_name,
                                        exp_no=run,
                                        ep_no=cp,
                                        mode=mode,
                                        add='_shift' + str(shift))
            resultlist[-1] += list(main(savep, mode))
    as_str = tabulate.tabulate(resultlist,
                               headers=[
                                   'shift', 'mse valid', 'psnr valid',
                                   'bc_mse valid', 'bc_psnr valid', 'mse test',
                                   'psnr test', 'bc_mse test', 'bc_psnr test'
                               ])
    shift_file = open(
        os.path.dirname(savep) + '/shift_evaluation_' + str(cp) + '.txt', 'w')
    shift_file.write(as_str)
    shift_file.close()
def main_func():
    # Select input folder
    fldr_in = utils.get_folder_path('Select input folder')
    if not fldr_in:
        raise Exception('Input folder selection aborted')
    fldr_in += r'*.nc'

    # Select output file
    file_out = utils.get_save_path('Select output file')
    if not file_out:
        raise Exception('Output file selection aborted')

    # Set xarray to keep attributes for DataArrays and Datasets
    xr.set_options(keep_attrs=True)

    # This concatenates the files into a Dataset
    ds = xr.open_mfdataset(fldr_in, engine='netcdf4', mask_and_scale=False)

    # Convert calendar to standard one
    utils.convert_calendar(ds)

    # Add to file history
    utils.add_to_history(ds=ds,
                         txt='Drozdowski concatenation of multiple files',
                         prepend=True)
    utils.add_to_history(ds=ds,
                         txt='Drozdowski: set calendar to standard',
                         prepend=True)

    # Get default encodings for use with Dataset::to_netcdf() method
    encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4)

    # Save Dataset to file with encodings
    ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings)

    # No need to close files!

    print('Done!!!')
Beispiel #27
0
def gen_aids_small(name, additional=False):
    datadir = get_root_path() + '/data'
    dirin = datadir + '/AIDS40k_orig'
    sfn = get_save_path() + '/aids40k_orig'
    loaded = load_as_dict(sfn)
    if not loaded:
        graphs = {}
        nodes_graphs = defaultdict(list)
        lesseq30 = set()
        lesseq10 = set()
        disconnects = set()
        # Iterate through all 40k graphs.
        for file in glob(dirin + '/*.gexf'):
            gid = int(file.split('/')[-1].split('.')[0])
            g = nx.read_gexf(file)
            if not nx.is_connected(g):
                print('{} not connected'.format(gid))
                disconnects.add(gid)
                continue
            graphs[gid] = g
            nodes_graphs[g.number_of_nodes()].append(gid)
            if g.number_of_nodes() <= 30:
                lesseq30.add(gid)
            if g.number_of_nodes() <= 10:
                lesseq10.add(gid)
        save_as_dict(sfn, graphs, nodes_graphs, lesseq30, lesseq10,
                     disconnects)
    else:
        graphs = loaded['graphs']
        nodes_graphs = loaded['nodes_graphs']
        lesseq30 = loaded['lesseq30']
        lesseq10 = loaded['lesseq10']
        disconnects = loaded['disconnects']
    print(len(disconnects), 'disconnected graphs out of', len(graphs))
    print(len(lesseq30), 'with <= 30 nodes')
    print(len(lesseq10), 'with <= 10 nodes')
    # exit(1)
    train_dir = '{}/{}/train'.format(datadir, name)
    if additional:
        train_data = load_data(name.lower(), train=True)
        test_dir_str = 'test2'
    else:
        exec_cmd('mkdir -p {}'.format(train_dir))
        test_dir_str = 'test'
    test_dir = '{}/{}/{}'.format(datadir, name, test_dir_str)
    exec_cmd('mkdir -p {}'.format(test_dir))
    if not additional:
        if name == 'AIDS10k':
            for num_node in range(5, 23):
                choose = random.Random(123).sample(nodes_graphs[num_node],
                                                   1)[0]
                print('choose {} with {} nodes'.format(choose, num_node))
                nx.write_gexf(graphs[choose],
                              test_dir + '/{}.gexf'.format(choose))
                lesseq30.remove(choose)
            for tid in random.Random(123).sample(lesseq30, 10000):
                nx.write_gexf(graphs[tid], train_dir + '/{}.gexf'.format(tid))
        elif name == 'AIDS700nef':
            lesseq10 = sample_from_lessthan10eq(train_dir, lesseq10, 560,
                                                graphs, 'train')
            sample_from_lessthan10eq(test_dir, lesseq10, 140, graphs, 'test')
    else:
        assert (name == 'AIDS10k')
        for num_node in range(5, 30):
            k = 4
            from_li = nodes_graphs[num_node]
            print('sampling {} from {} (size={})'.format(
                k, len(from_li), num_node))
            choose = random.Random(123).sample_exclude(from_li, k,
                                                       train_data.get_gids())
            print('choose {} with {} nodes'.format(choose, num_node))
            for c in choose:
                nx.write_gexf(graphs[c], test_dir + '/{}.gexf'.format(c))
    print('Done')
Beispiel #28
0
def aggregate_data(out_file=sys.stdout):
    dances = get_unique_dance_names(csv_data_dir)
    comprehensive_train_X = np.array([])
    comprehensive_train_Y = np.array([])
    comprehensive_validate_X = np.array([])
    comprehensive_validate_Y = np.array([])
    comprehensive_evaluation_X = np.array([])
    comprehensive_evaluation_Y = np.array([])

    comprehensive_train_Class_Y = np.array([])
    comprehensive_validate_Class_Y = np.array([])
    comprehensive_evaluation_Class_Y = np.array([])

    start_time = time.time()
    for dance in progressbar(dances, "Progress: "):
        csv_filename, np_filename = get_save_path(dance)
        train_X, train_Y, validate_X, validate_Y, evaluation_X, evaluation_Y = get_sample_data(
            csv_filename, np_filename, look_back, offset, forecast,
            sample_increment, training_split, validation_split,
            pos_pre_processes, rot_pre_processes)

        sentiment = dance.split('_')[-1]
        train_Class_Y = np.full((train_X.shape[0], 1), int(sentiment))
        validate_Class_Y = np.full((validate_X.shape[0], 1), int(sentiment))
        evaluation_Class_Y = np.full((evaluation_X.shape[0], 1),
                                     int(sentiment))

        if (len(comprehensive_train_X) == 0):
            comprehensive_train_X = train_X
            comprehensive_train_Y = train_Y
            comprehensive_validate_X = validate_X
            comprehensive_validate_Y = validate_Y
            comprehensive_evaluation_X = evaluation_X
            comprehensive_evaluation_Y = evaluation_Y

            comprehensive_train_Class_Y = train_Class_Y
            comprehensive_validate_Class_Y = validate_Class_Y
            comprehensive_evaluation_Class_Y = evaluation_Class_Y
        else:
            comprehensive_train_X = np.vstack((comprehensive_train_X, train_X))
            comprehensive_train_Y = np.vstack((comprehensive_train_Y, train_Y))
            comprehensive_validate_X = np.vstack(
                (comprehensive_validate_X, validate_X))
            comprehensive_validate_Y = np.vstack(
                (comprehensive_validate_Y, validate_Y))
            comprehensive_evaluation_X = np.vstack(
                (comprehensive_evaluation_X, evaluation_X))
            comprehensive_evaluation_Y = np.vstack(
                (comprehensive_evaluation_Y, evaluation_Y))

            comprehensive_train_Class_Y = np.vstack(
                (comprehensive_train_Class_Y, train_Class_Y))
            comprehensive_validate_Class_Y = np.vstack(
                (comprehensive_validate_Class_Y, validate_Class_Y))
            comprehensive_evaluation_Class_Y = np.vstack(
                (comprehensive_evaluation_Class_Y, evaluation_Class_Y))

    write(
        "Fetching and Agregating Training Data --- {} seconds ---".format(
            start_time - time.time()), out_file)

    np.save(training_filepath + "_X", comprehensive_train_X)
    np.save(training_filepath + "_Y", comprehensive_train_Y)
    np.save(validation_filepath + "_X", comprehensive_validate_X)
    np.save(validation_filepath + "_Y", comprehensive_validate_Y)
    np.save(evaluation_filepath + "_X", comprehensive_evaluation_X)
    np.save(evaluation_filepath + "_Y", comprehensive_evaluation_Y)

    np.save(training_filepath + "_Class_Y", comprehensive_train_Class_Y)
    np.save(validation_filepath + "_Class_Y", comprehensive_validate_Class_Y)
    np.save(evaluation_filepath + "_Class_Y", comprehensive_evaluation_Class_Y)

    print("Saved to", training_filepath + "_Class_Y")
Beispiel #29
0
def train_model(model, out_file=sys.stdout):
    """ Trains the model with the dance data.
        The History object's History.history attribute is a record of training loss values and metrics values at successive epochs, 
            as well as cooresponding validation values (if applicable).  

    :param model: the model to train
    :type keras.Model
    :param out_file: what to display/write the status information to
    :type output stream
    :return: the class containing the training metric information, the trained model, and the comprehensive evaluation data
    :type tuple
    """

    dances = get_unique_dance_names(csv_data_dir)
    checkpoint = keras.callbacks.ModelCheckpoint(filepath=weights_file,
                                                 monitor='val_loss',
                                                 mode='auto',
                                                 save_weights_only=True,
                                                 save_best_only=True)
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=stopping_patience,
        verbose=2,
        mode='auto',
        restore_best_weights=True)
    callbacks_list = [
        keras.callbacks.TerminateOnNaN(), checkpoint, early_stopping,
        CustomCallback(out_file)
    ]

    comprehensive_train_X = np.array([])
    comprehensive_train_Y = np.array([])
    comprehensive_validate_X = np.array([])
    comprehensive_validate_Y = np.array([])
    comprehensive_evaluation_X = np.array([])
    comprehensive_evaluation_Y = np.array([])

    write("Fetching and Agregating Training Data ...")  #sys.stdout
    start_time = time.time()
    for dance in progressbar(dances, "Progress: "):
        csv_filename, np_filename = get_save_path(dance)
        train_X, train_Y, validate_X, validate_Y, evaluation_X, evaluation_Y = get_sample_data(
            csv_filename, np_filename, look_back, offset, forecast,
            sample_increment, training_split, validation_split,
            convensional_method)
        if (len(comprehensive_train_X) == 0):
            comprehensive_train_X = train_X
            comprehensive_train_Y = train_Y
            comprehensive_validate_X = validate_X
            comprehensive_validate_Y = validate_Y
            comprehensive_evaluation_X = evaluation_X
            comprehensive_evaluation_Y = evaluation_Y
        else:
            comprehensive_train_X = np.vstack((comprehensive_train_X, train_X))
            comprehensive_train_Y = np.vstack((comprehensive_train_Y, train_Y))
            comprehensive_validate_X = np.vstack(
                (comprehensive_validate_X, validate_X))
            comprehensive_validate_Y = np.vstack(
                (comprehensive_validate_Y, validate_Y))
            comprehensive_evaluation_X = np.vstack(
                (comprehensive_evaluation_X, evaluation_X))
            comprehensive_evaluation_Y = np.vstack(
                (comprehensive_evaluation_Y, evaluation_Y))
    write(
        "Fetching and Agregating Training Data --- {} seconds ---".format(
            start_time - time.time()), out_file)
    start_time = time.time()
    history = model.fit(comprehensive_train_X,
                        comprehensive_train_Y,
                        batch_size=batch_size,
                        callbacks=callbacks_list,
                        validation_data=(comprehensive_validate_X,
                                         comprehensive_validate_Y),
                        epochs=epochs,
                        shuffle=shuffle_data,
                        verbose=1)

    save_model_checkpoint(model, model_file)
    np.save(evaluation_filepath + "_X", comprehensive_evaluation_X)
    np.save(evaluation_filepath + "_Y", comprehensive_evaluation_Y)
    with open(history_train_file, "w") as history_file:
        json.dump(
            pd.DataFrame.from_dict(history.history).to_dict(), history_file)
    write("Saved training metric history to json file:\n\t" +
          history_train_file)  #sys.stdout
    write(
        "Saved training metric history to json file:\n\t" + history_train_file,
        out_file)
    return history, model, comprehensive_evaluation_X, comprehensive_evaluation_Y
def load_train_test_data():
    # tvt = 'train'
    dir = join(get_save_path(), 'OurModelData')
    sfn = '{}_train_test_{}_{}_{}'.format(
        FLAGS.dataset, FLAGS.align_metric, FLAGS.node_ordering,
        '_'.join(get_flags_with_prefix_as_list('node_fe')))
    '''
    sfn = '{}_train_test_{}_{}_{}{}{}'.format(
        FLAGS.dataset, FLAGS.align_metric, FLAGS.node_ordering,
        '_'.join(get_flags_with_prefix_as_list('node_fe')),
        _none_empty_else_underscore(FLAGS.filter_large_size),
        _none_empty_else_underscore(FLAGS.select_node_pair))
    '''
    tp = join(dir, sfn)
    rtn = load(tp)
    if rtn:
        train_data, test_data = rtn['train_data'], rtn['test_data']
    else:
        train_data, test_data = _load_train_test_data_helper()
        save({'train_data': train_data, 'test_data': test_data}, tp)
    if FLAGS.validation:
        all_spare_ratio = 1 - FLAGS.throw_away
        train_val_ratio = 0.6 * all_spare_ratio
        dataset = train_data.dataset
        dataset.tvt = 'all'
        if all_spare_ratio != 1:
            dataset_train, dataset_test, _ = dataset.tvt_split(
                [train_val_ratio, all_spare_ratio],
                ['train', 'validation', 'spare'])
        else:
            dataset_train, dataset_test = dataset.tvt_split(
                [train_val_ratio], ['train', 'validation'])
        assert train_data.num_node_feat == test_data.num_node_feat
        train_data = OurModelData(dataset_train, train_data.num_node_feat)
        test_data = OurModelData(dataset_test, test_data.num_node_feat)

    if FLAGS.filter_large_size is not None:
        print('truncating graphs...')
        train_data.truncate_large_graphs()
        test_data.truncate_large_graphs()

    if FLAGS.select_node_pair is not None:
        print('selecting node pair...')
        train_data.select_specific_for_debugging()
        test_data.select_specific_for_debugging()

    train_data.dataset.print_stats()
    test_data.dataset.print_stats()

    dir = join(get_save_path(), 'anchor_data')
    sfn = '{}_{}_{}_{}'.format(
        FLAGS.dataset, FLAGS.align_metric, FLAGS.node_ordering,
        '_'.join(get_flags_with_prefix_as_list('node_fe')))
    tp = join(dir, sfn)
    rtn = load(tp)
    # if rtn:
    #     train_anchor, test_anchor = rtn['train_anchor'], rtn['test_anchor']
    #     train_data.dataset.generate_anchors(train_anchor)
    #     test_data.dataset.generate_anchors(test_anchor)
    # else:
    #     train_anchor = train_data.dataset.generate_anchors(None)
    #     test_anchor = test_data.dataset.generate_anchors(None)
    #     save({'train_anchor': train_anchor, 'test_anchor': test_anchor}, tp)
    #
    # # load to device
    # def load_to_device(dataset, device = FLAGS.device):
    #     for i, g in enumerate(dataset.dataset.gs):
    #         dataset.dataset.gs[i].nxgraph.graph['dists_max'] = g.nxgraph.graph['dists_max'].to(device)
    #         dataset.dataset.gs[i].nxgraph.graph['dists_argmax'] = g.nxgraph.graph['dists_argmax'].to(
    #             device)
    # load_to_device(train_data)
    # load_to_device(test_data)

    return train_data, test_data