def evaluate_fsrcnn(cp=12): resultlist = [] for d in [240, 280]: for s in [48, 64]: for m in [2, 3, 4]: name = 'FSRCNN_d{0:}_s{1:}_m{2:}'.format(d, s, m) for run in range(2): resultlist.append([d, s, m, run]) for mode in ['validation', 'test']: savep = utils.get_save_path(name, exp_no=run, ep_no=cp, mode=mode) resultlist[-1] += list(main(savep, mode)) as_str = tabulate.tabulate(resultlist, headers=[ 'd', 's', 'm', 'run', 'mse valid', 'psnr valid', 'bc_mse valid', 'bc_psnr valid', 'mse test', 'psnr test', 'bc_mse test', 'bc_psnr test' ]) file = open('../results_keras/summaries/FSRCNN_eval.txt', 'w') file.write(as_str) file.close()
def __init__(self, dataset, ds_metric, algo): if ds_metric == 'ged': self.dist_sim_func = ged ds = 'dist' elif ds_metric == 'glet': # graphlet similarity metric self.dist_sim_func = None # should be pre-computed and loaded ds = 'sim' elif ds_metric == 'mcs': self.dist_sim_func = mcs ds = 'dist' else: raise RuntimeError( 'Unknwon distance/similarity metric {}'.format(ds_metric)) self.sfn = '{}/{}_{}_{}{}_gidpair_{}_map'.format( get_save_path(), dataset, ds_metric, algo, '' if algo == 'astar' or algo == 'graphlet' or algo == 'mccreesh2017' else '_revtakemin', ds) self.algo = algo self.gidpair_ds_map = load(self.sfn) if not self.gidpair_ds_map: self.gidpair_ds_map = OrderedDict() save(self.sfn, self.gidpair_ds_map) print('Saved dist/sim map to {} with {} entries'.format( self.sfn, len(self.gidpair_ds_map))) else: print('Loaded dist/sim map from {} with {} entries'.format( self.sfn, len(self.gidpair_ds_map)))
def get_gs_ds_mat(gs1, gs2, dist_sim_calculator, tvt1, tvt2, dataset, dist_metric, dist_algo, norm, dec_gsize, return_neg1=False): mat_str = '{}({})_{}({})'.format(tvt1, len(gs1), tvt2, len(gs2)) dir = '{}/ds_mat'.format(get_save_path()) create_dir_if_not_exists(dir) sfn = '{}/{}_{}_ds_mat_{}{}_{}'.format( dir, dataset, mat_str, dist_metric, get_norm_str(norm), dist_algo) l = load(sfn) if l is not None: print('Loaded from {}'.format(sfn)) return l m = len(gs1) n = len(gs2) dist_mat = np.zeros((m, n)) for i in range(m): for j in range(n): g1 = gs1[i] g2 = gs2[j] d, normed_d = dist_sim_calculator.calculate_dist_sim( g1, g2, dec_gsize=dec_gsize, return_neg1=return_neg1) if norm: dist_mat[i][j] = normed_d else: dist_mat[i][j] = d save(sfn, dist_mat) print('Saved to {}'.format(sfn)) return dist_mat
def main_func(): # Select input file file_in = utils.get_open_path('Select input file') if not file_in: raise Exception('Input file selection aborted') # Select output file file_out = utils.get_save_path('Select output file') if not file_out: raise Exception('Output file selection aborted') # Set xarray to keep attributes for DataArrays and Datasets xr.set_options(keep_attrs=True) # Open file into a Dataset ds = xr.open_dataset(file_in, engine='netcdf4', mask_and_scale=False) # Convert calendar to standard one utils.convert_calendar(ds) # Add to file history utils.add_to_history(ds=ds, txt='Drozdowski: set calendar to standard', prepend=True) # No encodings because they interfere with calendar setting encodings = {} # Save Dataset to file with encodings ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings) # Close Dataset file ds.close() print('Done!!!')
def __init__(self, dataset, ds_metric, algo): if logging_enabled == True: print( "- Entered dist_sim_calculator::DistSimCalculator::__init__ Constructor Method" ) if ds_metric == 'ged': self.dist_sim_func = None ds = 'dist' else: raise RuntimeError( 'Unknown distance\similarity metric {}'.format(ds_metric)) self.sfn = '{}\\{}_{}_{}{}_gidpair_{}_map'.format( get_save_path(), dataset, ds_metric, algo, '' if algo == 'astar' or algo == 'graphlet' or algo == 'mccreesh2017' else '_revtakemin', ds) self.algo = algo self.gidpair_ds_map = load(self.sfn) if not self.gidpair_ds_map: self.gidpair_ds_map = OrderedDict() save(self.sfn, self.gidpair_ds_map) print('info: Saved dist/sim map to {} with {} entries'.format( self.sfn, len(self.gidpair_ds_map))) else: print('info: Loaded dist/sim map from {} with {} entries'.format( self.sfn, len(self.gidpair_ds_map)))
def main_evaluate_unets(cp=49): resultlist = [] for n_l in [4, 3, 2]: for n_f in [64, 32]: for n_c in [3, 2]: deconv = True name = 'Unet_nl{0:}_nc{1:}_nf{2:}_dc{3:}'.format( n_l, n_c, n_f, int(deconv)) run = 0 resultlist.append([n_l, n_f, n_c, run]) for mode in ['validation', 'test']: savep = utils.get_save_path(name, exp_no=run, ep_no=cp, mode=mode) resultlist[-1] += list(main(savep, mode)) as_str = tabulate.tabulate(resultlist, headers=[ 'num_levels', 'start_num_filters', 'num_convs', 'run', 'mse valid', 'psnr valid', 'bc_mse valid', 'bc_psnr valid', 'mse test', 'psnr test', 'bc_mse test', 'bc_psnr test' ]) file = open('../results_keras/summaries/Unet_eval.txt', 'w') file.write(as_str) file.close()
def load_data(): dir = join(get_save_path(), 'split') dataset_name = FLAGS.dataset train_ratio = int(FLAGS.tvt_ratio[0] * 100) val_ratio = int(FLAGS.tvt_ratio[1] * 100) test_ratio = 100 - train_ratio - val_ratio if 'presplit' not in dataset_name: save_fn = '{}_train_{}_val_{}_test_{}_seed_{}_window_size_{}'.format(dataset_name, train_ratio, val_ratio, test_ratio, FLAGS.random_seed, FLAGS.word_window_size) else: save_fn = '{}_train_val_test_{}_window_size_{}'.format(dataset_name, FLAGS.random_seed, FLAGS.word_window_size) path = join(dir, save_fn) rtn = load(path) if rtn: train_data, val_data, test_data = rtn['train_data'], rtn['val_data'], rtn['test_data'] else: train_data, val_data, test_data = _load_tvt_data_helper() save({'train_data': train_data, 'val_data': val_data, 'test_data': test_data}, path) dataset = FLAGS.dataset if "small" in dataset or "presplit" in dataset or 'sentiment' in dataset: dataset_name = "_".join(dataset.split("_")[:-1]) else: dataset_name = dataset orig_text_path = join(get_corpus_path(), dataset_name + "_sentences.txt") raw_doc_list = [] f = open(orig_text_path, 'rb') for line in f.readlines(): raw_doc_list.append(line.strip().decode()) f.close() return train_data, val_data, test_data, raw_doc_list
def main_func(): # Select input file file_in = utils.get_open_path('Select input file') if not file_in: raise Exception('Input file selection aborted') # Select output file file_out = utils.get_save_path('Select output file') if not file_out: raise Exception('Output file selection aborted') # Set xarray to keep attributes for DataArrays and Datasets xr.set_options(keep_attrs=True) # Open file into a Dataset ds = xr.open_dataset(file_in, engine='netcdf4', mask_and_scale=False) # Iterate each variable, looking for those with 4 dimensions for var_name in ds.data_vars: da = ds[var_name] dims = da.dims if len(dims) != 4: continue # Rearrange data so that 2nd dimension becomes 1st dimension da = da.transpose(dims[1], dims[0], dims[2], dims[3]) n = 0 # Get dimension data for 2nd dimension dims_data = da[dims[1]].data # Iterate each sub-DataArray in rearranged data for da_sub in da: # Assemble a name for the sub-DataArray name = da_sub.name + '_' + dims[1] + '_' + str(dims_data[n]) # Assign sub-DataArray to a new variable in Dataset ds[name] = da_sub n += 1 # Convert calendar to standard one utils.convert_calendar(ds) # Add to file history utils.add_to_history( ds=ds, txt='Drozdowski: explode 4D variables into multiple 3D variables', prepend=True) utils.add_to_history(ds=ds, txt='Drozdowski: set calendar to standard', prepend=True) # Get default encodings for use with Dataset::to_netcdf() method encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4) # Save file with above encoding ds.to_netcdf(path=file_out, encoding=encodings) # Close Dataset file ds.close() print('Done!!!')
def main_evaluate_checkpoints( name='FSRCNN_d{0:}_s{1:}_m{2:}'.format(240, 64, 3), run=2): resultlist = [] for cp in range(1, 50): resultlist.append([cp]) for mode in ['validation', 'test']: savep = utils.get_save_path(name, exp_no=run, ep_no=cp) resultlist[-1] += main(savep, mode) as_str = tabulate.tabulate(resultlist, headers=[ 'it', 'mse valid', 'psnr valid', 'bc_mse valid', 'bc_psnr valid', 'mse test', 'psnr test', 'bc_mse test', 'bc_psnr_test' ]) summary_file = open( os.path.dirname(utils.get_save_path(name, run, cp)) + '/checkpoint_eval.txt', 'w') summary_file.write(as_str) summary_file.close()
def get_gs_ds_mat(gs1, gs2, dist_sim_calculator, tvt1, tvt2, dataset, dist_metric, dist_algo, norm, dec_gsize, return_neg1=False): if logging_enabled == True: print("- Entered dist_sim_calculator::get_gs_ds_mat Global Method") mat_str = '{}({})_{}({})'.format(tvt1, len(gs1), tvt2, len(gs2)) dir = '{}\\ds_mat'.format(get_save_path()) create_dir_if_not_exists(dir) sfn = '{}\\{}_{}_ds_mat_{}{}_{}'.format(dir, dataset, mat_str, dist_metric, get_norm_str(norm), dist_algo) l = load(sfn) if l is not None: print('Loaded from {}'.format(sfn)) return l if not dist_sim_calculator.gidpair_ds_map: # dist_sim_calculator.initial_calculate_dist_sim(gs1, gs2) dist_sim_calculator.initial_dist_sim_pairs_with_netcomp(gs1, gs2) m = len(gs1) n = len(gs2) dist_mat = np.zeros((m, n)) for i in range(m): for j in range(n): g1 = gs1[i] g2 = gs2[j] d, normed_d = dist_sim_calculator.calculate_dist_sim( g1, g2, dec_gsize=dec_gsize, return_neg1=return_neg1) if norm: dist_mat[i][j] = normed_d print("i: ", i, ", j: ", j, ", d: ", d, ", normed_d: ", normed_d) else: dist_mat[i][j] = d save(sfn, dist_mat) print('Saved to {}'.format(sfn)) return dist_mat
def _load_train_triples(self, data, dist_calculator): gs = [g.nxgraph for g in data.train_gs] dist_mat = get_gs_dist_mat(gs, gs, dist_calculator, 'train', 'train', FLAGS.dataset, FLAGS.dist_metric, FLAGS.dist_algo, FLAGS.dist_norm) m, n = dist_mat.shape triples = [] generate_flag = FLAGS.fake_generation is not None repeat_flag = FLAGS.top_repeater is not None if generate_flag: assert ('fake_' in FLAGS.fake_generation) assert (not FLAGS.top_repeater) fake_num = int(FLAGS.fake_generation.split('_')[1]) filepath = get_save_path() + '/{}_fake_{}'.format( FLAGS.dataset, fake_num) load_data = load(filepath) if load_data: print('Loaded from {} with {} triples'.format( filepath, len(load_data.li))) return load_data node_feat_encoder = data.node_feat_encoder elif repeat_flag: assert ('_repeat_' in FLAGS.top_repeater) assert (not FLAGS.fake_generation) top_num = int(FLAGS.top_repeater.split('_')[0]) repeat_num = int(FLAGS.top_repeater.split('_')[2]) dist_mat_idx = np.argsort(dist_mat, axis=1) for i in range(m): g1 = data.train_gs[i] if generate_flag: sample_graphs, sample_geds = graph_generator( g1.nxgraph, fake_num) print(i, m, sample_geds) for sample_g, sample_ged in zip(sample_graphs, sample_geds): triples.append( (ModelGraph(g1.nxgraph, node_feat_encoder), ModelGraph(sample_g, node_feat_encoder), self.sim_kernel.dist_to_sim_np(sample_ged))) for j in range(n): col = dist_mat_idx[i][j] g2, ged = data.train_gs[col], dist_mat[i][col] triples.append((g1, g2, self.sim_kernel.dist_to_sim_np(ged))) if repeat_flag and j <= top_num: for _ in range(repeat_num): triples.append( (g1, g2, self.sim_kernel.dist_to_sim_np(ged))) rtn = SelfShuffleList(triples) if generate_flag: save(filepath, rtn) print('Saved to {} with {} triples'.format(filepath, len(rtn.li))) return rtn
def get_raw_data(filename): csv_filename, np_filename = utils.get_save_path(filename) position_df = pd.read_csv(csv_filename+"_worldpos.csv", usecols=['Hips.X','Hips.Y','Hips.Z']) rotation_df = pd.read_csv(csv_filename+"_rotations.csv") #data = utils.get_processed_data(csv_filename, np_filename, training_split, processes, processes) data = rotation_df.copy() # Add the root (hip) data for spacial movement data['Hips.Pos.X'] = position_df.copy().pop('Hips.X') data['Hips.Pos.Y'] = position_df.copy().pop('Hips.Y') data['Hips.Pos.Z'] = position_df.copy().pop('Hips.Z') return data
def _load_tvt_data_helper(): dir = join(get_save_path(), 'all') path = join(dir, FLAGS.dataset + '_all_window_' + str(FLAGS.word_window_size)) rtn = load(path) if rtn: dataset = TextDataset(None, None, None, None, None, None, rtn) else: dataset = build_text_graph_dataset(FLAGS.dataset, FLAGS.word_window_size) gc.collect() save(dataset.__dict__, path) train_dataset, val_dataset, test_dataset = dataset.tvt_split(FLAGS.tvt_ratio[:2], FLAGS.tvt_list, FLAGS.random_seed) return train_dataset, val_dataset, test_dataset
def run_evaluation(exp_name, run, ep_no, inner_cube=(24, 48, 48), bs=6, resolution=16): for mode in ['validation', 'test']: modelp = utils.get_model_path(exp_name, exp_no=run, ep_no=ep_no) savep = utils.get_save_path(exp_name, exp_no=run, ep_no=ep_no, mode=mode) simple_evaluator = Evaluator(modelp, savep, utils.get_data_path(mode, resolution)) simple_evaluator.run_full_evaluation(inner_cube=inner_cube, bs=bs)
def get_data(filename, process, train_split): csv_filename, np_filename = utils.get_save_path(filename) position_df = pd.read_csv(csv_filename+"_worldpos.csv", usecols=['Hips.X','Hips.Y','Hips.Z']) rotation_df = pd.read_csv(csv_filename+"_rotations.csv") #print(position_df.head()) position_df = _pre_process_pos_data(position_df, process, train_split) rotation_df = _pre_process_rot_data(rotation_df, process, train_split) #print(position_df.head()) data = rotation_df.copy() # Add the root (hip) data for spacial movement data['Hips.Pos.X'] = position_df.copy().pop('Hips.X') data['Hips.Pos.Y'] = position_df.copy().pop('Hips.Y') data['Hips.Pos.Z'] = position_df.copy().pop('Hips.Z') return data
def shifted_evaluation(exp_name, run, cp, resolution=16): for mode in ['validation', 'test']: modelp = utils.get_model_path(exp_name, exp_no=run, ep_no=cp) shifted_evaluator = Evaluator(modelp, '', utils.get_data_path(mode, resolution)) for shift in range(int(shifted_evaluator.sc)): savep = utils.get_save_path(exp_name, exp_no=run, ep_no=cp, mode=mode, add='_shift' + str(shift)) shifted_evaluator.reset_save_path(savep) shifted_evaluator.run_full_evaluation(inner_cube=(24, 48, 48), bs=6, safety_margin=(shift, -shift))
def main_func(): # Select input file file_in = utils.get_open_path('Select input file') if not file_in: raise Exception('Input file selection aborted') # Select output file file_out = utils.get_save_path('Select output file') if not file_out: raise Exception('Output file selection aborted') def is_leap_day(dt): return (dt.year % 4 == 0) & ( (dt.year % 100 != 0) | (dt.year % 400 == 0)) & (dt.month == 2) & (dt.day == 29) # Set xarray to keep attributes for DataArrays and Datasets xr.set_options(keep_attrs=True) # Open file into a Dataset ds = xr.open_dataset(file_in, engine='netcdf4', mask_and_scale=False) # Select records that aren't leap days into a Dataset mask = is_leap_day(ds.time.dt) ds = ds.sel(time=~(mask)) # Convert calendar to noleap utils.convert_calendar(ds, 'noleap') # Add to file history utils.add_to_history(ds=ds, txt='Drozdowski: remove leap days', prepend=True) utils.add_to_history(ds=ds, txt='Drozdowski: set calendar to noleap', prepend=True) # Get default encodings for use with Dataset::to_netcdf() method encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4) # Save Dataset to file with encodings ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings) # Close Dataset file ds.close() print('Done!!!')
def _load_json_emb(self): fn = get_save_path() + '/{}_graph2vec_json_dict.pkl'.format( self.dataset) if isfile(fn): with open(fn, 'rb') as handle: d = load_pkl(handle) print('Loaded json dict from {}'.format(fn)) return d dfn = get_model_path( ) + '/graph2vec_tf/embeddings/{}_train_test_dims_{}_epochs_1000_lr_0.3_embeddings.txt'.format( self.dataset, self.dim) with open(dfn) as json_data: d = json.load(json_data) with open(fn, 'wb') as handle: save_pkl(d, handle) print('Loaded json dict from {}\nSaved to {}'.format(dfn, fn)) return d
def __init__(self, dataset, dist_metric, algo): self.sfn = '{}/{}_{}_{}{}_gidpair_dist_map'.format( get_save_path(), dataset, dist_metric, algo, '' if algo == 'astar' else '_revtakemin') self.algo = algo self.gidpair_dist_map = load(self.sfn) if not self.gidpair_dist_map: self.gidpair_dist_map = OrderedDict() save(self.sfn, self.gidpair_dist_map) print('Saved dist map to {} with {} entries'.format( \ self.sfn, len(self.gidpair_dist_map))) else: print('Loaded dist map from {} with {} entries'.format( \ self.sfn, len(self.gidpair_dist_map))) if dist_metric == 'ged': self.dist_func = ged else: raise RuntimeError('Unknwon distance metric {}'.format(dist_metric))
def main_evaluate_fsrcnn_longrun(run=2, cp=49): resultlist = [] for d, s, m in zip([240, 240, 280], [64, 64, 64], [3, 2, 2]): name = 'FSRCNN_d{0:}_s{1:}_m{2:}'.format(d, s, m) resultlist.append([d, s, m, run]) for mode in ['validation', 'test']: savep = utils.get_save_path(name, exp_no=run, ep_no=cp, mode=mode) resultlist[-1] += list(main(savep, mode)) as_str = tabulate.tabulate(resultlist, headers=[ 'd', 's', 'm', 'run', 'mse valid', 'psnr valid', 'bc_mse valid', 'bc_psnr valid', 'mse test', 'psnr test', 'bc_mse test', 'bc_psnr test' ]) file = open('../results_keras/summaries/FSRCNN_eval_longrun.txt', 'w') file.write(as_str) file.close()
def compare_processed_error(vert_axis=None): dance_index = 92 dances = utils.get_unique_dance_names(csv_data_dir) dances.sort() csv_filename, np_filename = utils.get_save_path(dances[dance_index]) print(csv_filename) hierarchy_file = os.path.join(hierarchy_dir, "AI_hierarchy.csv") hierarchy_df = pd.read_csv(hierarchy_file) raw_position, raw_rotation = get_raw_data(dances[dance_index]) c_headers = [ c for c in raw_rotation.columns if 'End' not in c and 'Time' not in c ] full_headers = [ c for c in raw_rotation.columns if 'End' not in c and 'Time' not in c ] full_headers.append('Hips.Pos.X') full_headers.append('Hips.Pos.Y') full_headers.append('Hips.Pos.Z') raw_position.columns = full_headers[-3:] print_header("Vertical Axis: {}".format(vert_axis)) rel = _pre_process_pos_data(raw_position.copy(), True, False, training_split, vert_axis) position_df = _post_process_pos_data(rel, hierarchy_df, True, False, training_split, vert_axis) print(position_df.head()) compute_differences(position_df, raw_position, "Relativized") rel = _pre_process_pos_data(raw_position.copy(), False, True, training_split, vert_axis) position_df = _post_process_pos_data(rel, hierarchy_df, False, True, training_split, vert_axis) print(position_df.head()) compute_differences(position_df, raw_position, "Standardized") rel = _pre_process_pos_data(raw_position.copy(), True, True, training_split, vert_axis) position_df = _post_process_pos_data(rel, hierarchy_df, True, True, training_split, vert_axis) print(position_df.head()) compute_differences(position_df, raw_position, "Relativized + Standardized")
def bicubic_main(mode='validation', sc=4.): filename = utils.get_save_path('FSRCNN_d{0:}_s{1:}_m{2:}'.format( 240, 64, 2), exp_no=2, ep_no=49, mode=mode) prediction = np.array(h5py.File(filename, 'r')['raw']) gt = np.array( h5py.File( '/nrs/saalfeld/heinrichl/SR-data/FIBSEM/downscaled/bigh5-16iso/' + mode + '.h5', 'r')['raw']) / 255. gt = np.squeeze(gt) downscaled = utils.downscale_manually(gt, sc) bicubic = utils.bicubic_up(downscaled, sc, 0) prediction, [bicubic] = utils.cut_to_same_size(prediction, [bicubic]) mse, psnr, bicubic_weighted_mse, bicubic_weighted_psnr = run_eval( gt, bicubic) return mse, psnr, bicubic_weighted_mse, bicubic_weighted_psnr
def load_dataset(name, tvt, align_metric, node_ordering): name_list = [name] if not name or type(name) is not str: raise ValueError('name must be a non-empty string') check_tvt(tvt) name_list.append(tvt) check_align(align_metric) name_list.append(align_metric) if node_ordering is None: node_ordering = 'noordering' elif node_ordering == 'bfs': pass else: raise ValueError('Unknown node ordering {}'.format(node_ordering)) name_list.append(node_ordering) full_name = '_'.join(name_list) p = join(get_save_path(), 'dataset', full_name) ld = load(p) ''' ######### this is solely for running locally lol ######### ld['pairs'] = {(1022,1023):ld['pairs'][(1022,1023)],\ (1036,1037):ld['pairs'][(1036,1037)], \ (104,105):ld['pairs'][(104,105)],\ (1042,1043):ld['pairs'][(1042,1043)],\ (1048,1049):ld['pairs'][(1048,1049)],\ } ''' if ld: _, _, _, _, _, dataset_type = get_dataset_conf(name) if dataset_type == 'OurDataset': rtn = OurDataset(None, None, None, None, None, None, None, None, ld) elif dataset_type == 'OurOldDataset': rtn = OurOldDataset(None, None, None, None, None, None, None, None, None, None, ld) else: raise NotImplementedError() else: rtn = _load_dataset_helper(name, tvt, align_metric, node_ordering) save(rtn.__dict__, p) if rtn.num_graphs() == 0: raise ValueError('{} has 0 graphs'.format(name)) return rtn
def _load_create_fake_pairs_if_needed(self, data): if FLAGS.fake_generation: assert ('fake_' in FLAGS.fake_generation) fake_num = int(FLAGS.fake_generation.split('_')[1]) dir = get_save_path() + '/siamese_regession_fake_pairs' if FLAGS.ds_metric == 'mcs': dir += '_mcs' create_dir_if_not_exists(dir) filepath = dir + '/{}_fake_{}'.format(FLAGS.dataset_train, fake_num) ld = load(filepath) if ld: print('Loaded from {} with {} fake triples'.format( filepath, len(ld))) return ld rtn = self._create_fake_pairs(data, fake_num) save(filepath, rtn) return rtn else: return []
def main_evaluate_shift(exp_name, run, cp, sc=4): resultlist = [] for shift in range(sc): resultlist.append([shift]) for mode in ['validation', 'test']: savep = utils.get_save_path(exp_name, exp_no=run, ep_no=cp, mode=mode, add='_shift' + str(shift)) resultlist[-1] += list(main(savep, mode)) as_str = tabulate.tabulate(resultlist, headers=[ 'shift', 'mse valid', 'psnr valid', 'bc_mse valid', 'bc_psnr valid', 'mse test', 'psnr test', 'bc_mse test', 'bc_psnr test' ]) shift_file = open( os.path.dirname(savep) + '/shift_evaluation_' + str(cp) + '.txt', 'w') shift_file.write(as_str) shift_file.close()
def main_func(): # Select input folder fldr_in = utils.get_folder_path('Select input folder') if not fldr_in: raise Exception('Input folder selection aborted') fldr_in += r'*.nc' # Select output file file_out = utils.get_save_path('Select output file') if not file_out: raise Exception('Output file selection aborted') # Set xarray to keep attributes for DataArrays and Datasets xr.set_options(keep_attrs=True) # This concatenates the files into a Dataset ds = xr.open_mfdataset(fldr_in, engine='netcdf4', mask_and_scale=False) # Convert calendar to standard one utils.convert_calendar(ds) # Add to file history utils.add_to_history(ds=ds, txt='Drozdowski concatenation of multiple files', prepend=True) utils.add_to_history(ds=ds, txt='Drozdowski: set calendar to standard', prepend=True) # Get default encodings for use with Dataset::to_netcdf() method encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4) # Save Dataset to file with encodings ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings) # No need to close files! print('Done!!!')
def gen_aids_small(name, additional=False): datadir = get_root_path() + '/data' dirin = datadir + '/AIDS40k_orig' sfn = get_save_path() + '/aids40k_orig' loaded = load_as_dict(sfn) if not loaded: graphs = {} nodes_graphs = defaultdict(list) lesseq30 = set() lesseq10 = set() disconnects = set() # Iterate through all 40k graphs. for file in glob(dirin + '/*.gexf'): gid = int(file.split('/')[-1].split('.')[0]) g = nx.read_gexf(file) if not nx.is_connected(g): print('{} not connected'.format(gid)) disconnects.add(gid) continue graphs[gid] = g nodes_graphs[g.number_of_nodes()].append(gid) if g.number_of_nodes() <= 30: lesseq30.add(gid) if g.number_of_nodes() <= 10: lesseq10.add(gid) save_as_dict(sfn, graphs, nodes_graphs, lesseq30, lesseq10, disconnects) else: graphs = loaded['graphs'] nodes_graphs = loaded['nodes_graphs'] lesseq30 = loaded['lesseq30'] lesseq10 = loaded['lesseq10'] disconnects = loaded['disconnects'] print(len(disconnects), 'disconnected graphs out of', len(graphs)) print(len(lesseq30), 'with <= 30 nodes') print(len(lesseq10), 'with <= 10 nodes') # exit(1) train_dir = '{}/{}/train'.format(datadir, name) if additional: train_data = load_data(name.lower(), train=True) test_dir_str = 'test2' else: exec_cmd('mkdir -p {}'.format(train_dir)) test_dir_str = 'test' test_dir = '{}/{}/{}'.format(datadir, name, test_dir_str) exec_cmd('mkdir -p {}'.format(test_dir)) if not additional: if name == 'AIDS10k': for num_node in range(5, 23): choose = random.Random(123).sample(nodes_graphs[num_node], 1)[0] print('choose {} with {} nodes'.format(choose, num_node)) nx.write_gexf(graphs[choose], test_dir + '/{}.gexf'.format(choose)) lesseq30.remove(choose) for tid in random.Random(123).sample(lesseq30, 10000): nx.write_gexf(graphs[tid], train_dir + '/{}.gexf'.format(tid)) elif name == 'AIDS700nef': lesseq10 = sample_from_lessthan10eq(train_dir, lesseq10, 560, graphs, 'train') sample_from_lessthan10eq(test_dir, lesseq10, 140, graphs, 'test') else: assert (name == 'AIDS10k') for num_node in range(5, 30): k = 4 from_li = nodes_graphs[num_node] print('sampling {} from {} (size={})'.format( k, len(from_li), num_node)) choose = random.Random(123).sample_exclude(from_li, k, train_data.get_gids()) print('choose {} with {} nodes'.format(choose, num_node)) for c in choose: nx.write_gexf(graphs[c], test_dir + '/{}.gexf'.format(c)) print('Done')
def aggregate_data(out_file=sys.stdout): dances = get_unique_dance_names(csv_data_dir) comprehensive_train_X = np.array([]) comprehensive_train_Y = np.array([]) comprehensive_validate_X = np.array([]) comprehensive_validate_Y = np.array([]) comprehensive_evaluation_X = np.array([]) comprehensive_evaluation_Y = np.array([]) comprehensive_train_Class_Y = np.array([]) comprehensive_validate_Class_Y = np.array([]) comprehensive_evaluation_Class_Y = np.array([]) start_time = time.time() for dance in progressbar(dances, "Progress: "): csv_filename, np_filename = get_save_path(dance) train_X, train_Y, validate_X, validate_Y, evaluation_X, evaluation_Y = get_sample_data( csv_filename, np_filename, look_back, offset, forecast, sample_increment, training_split, validation_split, pos_pre_processes, rot_pre_processes) sentiment = dance.split('_')[-1] train_Class_Y = np.full((train_X.shape[0], 1), int(sentiment)) validate_Class_Y = np.full((validate_X.shape[0], 1), int(sentiment)) evaluation_Class_Y = np.full((evaluation_X.shape[0], 1), int(sentiment)) if (len(comprehensive_train_X) == 0): comprehensive_train_X = train_X comprehensive_train_Y = train_Y comprehensive_validate_X = validate_X comprehensive_validate_Y = validate_Y comprehensive_evaluation_X = evaluation_X comprehensive_evaluation_Y = evaluation_Y comprehensive_train_Class_Y = train_Class_Y comprehensive_validate_Class_Y = validate_Class_Y comprehensive_evaluation_Class_Y = evaluation_Class_Y else: comprehensive_train_X = np.vstack((comprehensive_train_X, train_X)) comprehensive_train_Y = np.vstack((comprehensive_train_Y, train_Y)) comprehensive_validate_X = np.vstack( (comprehensive_validate_X, validate_X)) comprehensive_validate_Y = np.vstack( (comprehensive_validate_Y, validate_Y)) comprehensive_evaluation_X = np.vstack( (comprehensive_evaluation_X, evaluation_X)) comprehensive_evaluation_Y = np.vstack( (comprehensive_evaluation_Y, evaluation_Y)) comprehensive_train_Class_Y = np.vstack( (comprehensive_train_Class_Y, train_Class_Y)) comprehensive_validate_Class_Y = np.vstack( (comprehensive_validate_Class_Y, validate_Class_Y)) comprehensive_evaluation_Class_Y = np.vstack( (comprehensive_evaluation_Class_Y, evaluation_Class_Y)) write( "Fetching and Agregating Training Data --- {} seconds ---".format( start_time - time.time()), out_file) np.save(training_filepath + "_X", comprehensive_train_X) np.save(training_filepath + "_Y", comprehensive_train_Y) np.save(validation_filepath + "_X", comprehensive_validate_X) np.save(validation_filepath + "_Y", comprehensive_validate_Y) np.save(evaluation_filepath + "_X", comprehensive_evaluation_X) np.save(evaluation_filepath + "_Y", comprehensive_evaluation_Y) np.save(training_filepath + "_Class_Y", comprehensive_train_Class_Y) np.save(validation_filepath + "_Class_Y", comprehensive_validate_Class_Y) np.save(evaluation_filepath + "_Class_Y", comprehensive_evaluation_Class_Y) print("Saved to", training_filepath + "_Class_Y")
def train_model(model, out_file=sys.stdout): """ Trains the model with the dance data. The History object's History.history attribute is a record of training loss values and metrics values at successive epochs, as well as cooresponding validation values (if applicable). :param model: the model to train :type keras.Model :param out_file: what to display/write the status information to :type output stream :return: the class containing the training metric information, the trained model, and the comprehensive evaluation data :type tuple """ dances = get_unique_dance_names(csv_data_dir) checkpoint = keras.callbacks.ModelCheckpoint(filepath=weights_file, monitor='val_loss', mode='auto', save_weights_only=True, save_best_only=True) early_stopping = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=stopping_patience, verbose=2, mode='auto', restore_best_weights=True) callbacks_list = [ keras.callbacks.TerminateOnNaN(), checkpoint, early_stopping, CustomCallback(out_file) ] comprehensive_train_X = np.array([]) comprehensive_train_Y = np.array([]) comprehensive_validate_X = np.array([]) comprehensive_validate_Y = np.array([]) comprehensive_evaluation_X = np.array([]) comprehensive_evaluation_Y = np.array([]) write("Fetching and Agregating Training Data ...") #sys.stdout start_time = time.time() for dance in progressbar(dances, "Progress: "): csv_filename, np_filename = get_save_path(dance) train_X, train_Y, validate_X, validate_Y, evaluation_X, evaluation_Y = get_sample_data( csv_filename, np_filename, look_back, offset, forecast, sample_increment, training_split, validation_split, convensional_method) if (len(comprehensive_train_X) == 0): comprehensive_train_X = train_X comprehensive_train_Y = train_Y comprehensive_validate_X = validate_X comprehensive_validate_Y = validate_Y comprehensive_evaluation_X = evaluation_X comprehensive_evaluation_Y = evaluation_Y else: comprehensive_train_X = np.vstack((comprehensive_train_X, train_X)) comprehensive_train_Y = np.vstack((comprehensive_train_Y, train_Y)) comprehensive_validate_X = np.vstack( (comprehensive_validate_X, validate_X)) comprehensive_validate_Y = np.vstack( (comprehensive_validate_Y, validate_Y)) comprehensive_evaluation_X = np.vstack( (comprehensive_evaluation_X, evaluation_X)) comprehensive_evaluation_Y = np.vstack( (comprehensive_evaluation_Y, evaluation_Y)) write( "Fetching and Agregating Training Data --- {} seconds ---".format( start_time - time.time()), out_file) start_time = time.time() history = model.fit(comprehensive_train_X, comprehensive_train_Y, batch_size=batch_size, callbacks=callbacks_list, validation_data=(comprehensive_validate_X, comprehensive_validate_Y), epochs=epochs, shuffle=shuffle_data, verbose=1) save_model_checkpoint(model, model_file) np.save(evaluation_filepath + "_X", comprehensive_evaluation_X) np.save(evaluation_filepath + "_Y", comprehensive_evaluation_Y) with open(history_train_file, "w") as history_file: json.dump( pd.DataFrame.from_dict(history.history).to_dict(), history_file) write("Saved training metric history to json file:\n\t" + history_train_file) #sys.stdout write( "Saved training metric history to json file:\n\t" + history_train_file, out_file) return history, model, comprehensive_evaluation_X, comprehensive_evaluation_Y
def load_train_test_data(): # tvt = 'train' dir = join(get_save_path(), 'OurModelData') sfn = '{}_train_test_{}_{}_{}'.format( FLAGS.dataset, FLAGS.align_metric, FLAGS.node_ordering, '_'.join(get_flags_with_prefix_as_list('node_fe'))) ''' sfn = '{}_train_test_{}_{}_{}{}{}'.format( FLAGS.dataset, FLAGS.align_metric, FLAGS.node_ordering, '_'.join(get_flags_with_prefix_as_list('node_fe')), _none_empty_else_underscore(FLAGS.filter_large_size), _none_empty_else_underscore(FLAGS.select_node_pair)) ''' tp = join(dir, sfn) rtn = load(tp) if rtn: train_data, test_data = rtn['train_data'], rtn['test_data'] else: train_data, test_data = _load_train_test_data_helper() save({'train_data': train_data, 'test_data': test_data}, tp) if FLAGS.validation: all_spare_ratio = 1 - FLAGS.throw_away train_val_ratio = 0.6 * all_spare_ratio dataset = train_data.dataset dataset.tvt = 'all' if all_spare_ratio != 1: dataset_train, dataset_test, _ = dataset.tvt_split( [train_val_ratio, all_spare_ratio], ['train', 'validation', 'spare']) else: dataset_train, dataset_test = dataset.tvt_split( [train_val_ratio], ['train', 'validation']) assert train_data.num_node_feat == test_data.num_node_feat train_data = OurModelData(dataset_train, train_data.num_node_feat) test_data = OurModelData(dataset_test, test_data.num_node_feat) if FLAGS.filter_large_size is not None: print('truncating graphs...') train_data.truncate_large_graphs() test_data.truncate_large_graphs() if FLAGS.select_node_pair is not None: print('selecting node pair...') train_data.select_specific_for_debugging() test_data.select_specific_for_debugging() train_data.dataset.print_stats() test_data.dataset.print_stats() dir = join(get_save_path(), 'anchor_data') sfn = '{}_{}_{}_{}'.format( FLAGS.dataset, FLAGS.align_metric, FLAGS.node_ordering, '_'.join(get_flags_with_prefix_as_list('node_fe'))) tp = join(dir, sfn) rtn = load(tp) # if rtn: # train_anchor, test_anchor = rtn['train_anchor'], rtn['test_anchor'] # train_data.dataset.generate_anchors(train_anchor) # test_data.dataset.generate_anchors(test_anchor) # else: # train_anchor = train_data.dataset.generate_anchors(None) # test_anchor = test_data.dataset.generate_anchors(None) # save({'train_anchor': train_anchor, 'test_anchor': test_anchor}, tp) # # # load to device # def load_to_device(dataset, device = FLAGS.device): # for i, g in enumerate(dataset.dataset.gs): # dataset.dataset.gs[i].nxgraph.graph['dists_max'] = g.nxgraph.graph['dists_max'].to(device) # dataset.dataset.gs[i].nxgraph.graph['dists_argmax'] = g.nxgraph.graph['dists_argmax'].to( # device) # load_to_device(train_data) # load_to_device(test_data) return train_data, test_data