def write_timing_csv(self, iter_: int, fit_time: float, gen_time: float) -> None: """ Writes the timings stats for each iteration """ fieldnames = [ 'date', 'gname', 'model', 'trial', 'iter', 'gens', 'fit_time', 'gen_time' ] stats_file = Path(get_imt_output_directory()) / 'new_timing_stats.csv' if not stats_file.exists(): writer = csv.DictWriter(open(stats_file, 'w'), fieldnames=fieldnames) writer.writeheader() with open(stats_file, 'a') as fp: writer = csv.DictWriter(fp, fieldnames=fieldnames) row = { 'date': str(datetime.datetime.now().date()), 'gname': self.initial_graph.name, 'model': self.model.model_name, 'trial': self.trial, 'iter': iter_, 'gens': self.num_generations, 'fit_time': fit_time, 'gen_time': gen_time } writer.writerow(row) return
def get_pair_of_zipped_objects(self, metric: str) -> Tuple: """ Returns a pair of objects lists/dicts - first for the root, and the second for the particular iteration :param metric: :return: """ imt_output_directory = get_imt_output_directory() self.set_root_object(metric=metric) obj_iter = load_zipped_json(filename=join( imt_output_directory, 'graph_stats', self.dataset, self.model, metric, f'gs_{self.trial}_{self.iteration}.json.gz'), keys_to_int=True) return self.root, obj_iter
def set_root_object(self, metric) -> Any: # initialize the root object imt_output_directory = get_imt_output_directory() if not self.root or not self.root_metric == metric: self.root = load_zipped_json(filename=join( imt_output_directory, 'graph_stats', self.dataset, self.model, metric, f'gs_{self.trial}_0.json.gz'), keys_to_int=True) # look for the last iterable file for this dataset and model combination for iteration in reversed(range(21)): filename = join(imt_output_directory, 'graph_stats', self.dataset, self.model, metric, f'gs_{self.trial}_{iteration}.json.gz') if verify_file(filename): self.total_iterations = iteration self.root_metric = metric break
def write_stats_jsons(self, stats: Union[str, list], overwrite: bool = False) -> None: """ write the stats dictionary as a compressed json :return: """ # standardize incoming type if isinstance(stats, str): stats = [stats] for statistic in stats: assert statistic in [ method_name for method_name in dir(self) if callable(getattr(self, method_name)) and not method_name.startswith('_') ] output_directory = get_imt_output_directory() file_output_directory = os.path.join(output_directory, 'graph_stats', self.dataset, self.model, statistic) ensure_dir(file_output_directory, recursive=True) filename = os.path.join( output_directory, 'graph_stats', self.dataset, self.model, statistic, f'gs_{self.trial}_{self.iteration}.json.gz') # if the file already exists and overwrite flag is not set, then don't rework. if not overwrite and verify_file(filename): CP.print_green( f'Statistic: {statistic} output file for {self.model}-{self.dataset}-{self.trial} already exists. Skipping.' ) return try: data = self[statistic] # todo : maybe there's a better way?! save_zipped_json(data, filename) CP.print_blue(f'Stats json stored at {filename}') except Exception as e: CP.print_red(f'Exception occurred on {filename}!') CP.print_red(str(e)) if statistic == 'netlsd': save_zipped_json(data, filename + '.failed') return
def run_infinity_mirror(args, trial) -> None: """ Creates and runs infinity mirror :return: """ selection, g, model, num_gens, use_pickle, num_graphs, rewire, finish, features_bool = process_args( args) # process args returns the Class and not an object empty_g = nx.empty_graph(1) empty_g.name = 'empty' # create an empty graph as a placeholder if args.model[0] in ('GCN_AE', 'GCN_VAE', 'Linear_AE', 'Linear_VAE', 'Deep_GCN_AE', 'Deep_GCN_VAE'): model_obj = model(input_graph=empty_g, trial=trial, kind=args.model[0]) else: model_obj = model( input_graph=empty_g, trial=trial ) # this is a roundabout way to ensure the name of GraphModel object is correct imt_output_dir = get_imt_output_directory() make_dirs(output_dir=imt_output_dir, gname=g.name, model=model_obj.model_name) assert selection == 'fast', 'invalid selection' num_graphs = 1 # only 1 graph per generation inf = InfinityMirror(initial_graph=g, num_generations=num_gens, model_obj=model_obj, num_graphs=num_graphs, trial=trial, r=rewire, dataset=g.name, model=args.model[0], finish=finish, features_bool=features_bool) tic = time.perf_counter() inf.run(use_pickle=use_pickle) toc = time.perf_counter() inf.write_timing_stats(round(toc - tic, 3)) print(trial, inf) return
def stats_computation(dataset, model, trial, filename, stats): path = Path( get_imt_output_directory()) / 'pickles' / dataset / model / filename graph_list = load_pickle(path) assert isinstance( graph_list, list), f'Expected type "list" and got type {type(graph_list)}.' assert all(isinstance(g, nx.Graph) for g in graph_list ), f'Expected a list of nx.Graph and got disappointed instead.' ColorPrint.print_orange(f'{filename} has length {len(graph_list)}') for idx, G in enumerate(graph_list): gs_obj = GraphStats(graph=G, dataset=dataset, model=model, trial=trial, iteration=idx) gs_obj.write_stats_jsons(stats=stats) return None
for dataset in datasets: for model in models: for stat in stats: ColorPrint.print_green( f'computing {stat} distances for {dataset} {model}') trials = walker_texas_ranger(dataset, model, stat=implemented_metrics[stat], unique=True) args = [[dataset, model, trial, stat] for trial in trials] print(args[:5]) # exit(-1) try: results = parallel_async(distance_computation, args, num_workers=10) df = pd.concat(results) except Exception as e: ColorPrint.print_red( f'Error, for {dataset!r} {model!r} {stat!r}') continue # output_dir = f'/data/infinity-mirror/output/distances/{dataset}/{model}/{stat}/' output_dir = Path( get_imt_output_directory()) / 'distances' / dataset ensure_dir(output_dir, recursive=True) df.to_csv(output_dir / f'{dataset}_{model}_{stat}.csv') # for arg in args: # distance_computation(*arg)