def write_timing_csv(self, iter_: int, fit_time: float,
                         gen_time: float) -> None:
        """
        Writes the timings stats for each iteration
        """
        fieldnames = [
            'date', 'gname', 'model', 'trial', 'iter', 'gens', 'fit_time',
            'gen_time'
        ]

        stats_file = Path(get_imt_output_directory()) / 'new_timing_stats.csv'
        if not stats_file.exists():
            writer = csv.DictWriter(open(stats_file, 'w'),
                                    fieldnames=fieldnames)
            writer.writeheader()

        with open(stats_file, 'a') as fp:
            writer = csv.DictWriter(fp, fieldnames=fieldnames)
            row = {
                'date': str(datetime.datetime.now().date()),
                'gname': self.initial_graph.name,
                'model': self.model.model_name,
                'trial': self.trial,
                'iter': iter_,
                'gens': self.num_generations,
                'fit_time': fit_time,
                'gen_time': gen_time
            }
            writer.writerow(row)
        return
    def get_pair_of_zipped_objects(self, metric: str) -> Tuple:
        """
        Returns a pair of objects lists/dicts - first for the root, and the second for the particular iteration
        :param metric:
        :return:
        """
        imt_output_directory = get_imt_output_directory()

        self.set_root_object(metric=metric)

        obj_iter = load_zipped_json(filename=join(
            imt_output_directory, 'graph_stats', self.dataset, self.model,
            metric, f'gs_{self.trial}_{self.iteration}.json.gz'),
                                    keys_to_int=True)

        return self.root, obj_iter
 def set_root_object(self, metric) -> Any:
     # initialize the root object
     imt_output_directory = get_imt_output_directory()
     if not self.root or not self.root_metric == metric:
         self.root = load_zipped_json(filename=join(
             imt_output_directory, 'graph_stats', self.dataset, self.model,
             metric, f'gs_{self.trial}_0.json.gz'),
                                      keys_to_int=True)
         # look for the last iterable file for this dataset and model combination
         for iteration in reversed(range(21)):
             filename = join(imt_output_directory, 'graph_stats',
                             self.dataset, self.model, metric,
                             f'gs_{self.trial}_{iteration}.json.gz')
             if verify_file(filename):
                 self.total_iterations = iteration
                 self.root_metric = metric
                 break
Ejemplo n.º 4
0
    def write_stats_jsons(self,
                          stats: Union[str, list],
                          overwrite: bool = False) -> None:
        """
        write the stats dictionary as a compressed json
        :return:
        """
        # standardize incoming type
        if isinstance(stats, str):
            stats = [stats]

        for statistic in stats:
            assert statistic in [
                method_name for method_name in dir(self)
                if callable(getattr(self, method_name))
                and not method_name.startswith('_')
            ]
            output_directory = get_imt_output_directory()

            file_output_directory = os.path.join(output_directory,
                                                 'graph_stats', self.dataset,
                                                 self.model, statistic)
            ensure_dir(file_output_directory, recursive=True)

            filename = os.path.join(
                output_directory, 'graph_stats', self.dataset, self.model,
                statistic, f'gs_{self.trial}_{self.iteration}.json.gz')

            # if the file already exists and overwrite flag is not set, then don't rework.
            if not overwrite and verify_file(filename):
                CP.print_green(
                    f'Statistic: {statistic} output file for {self.model}-{self.dataset}-{self.trial} already exists. Skipping.'
                )
                return

            try:
                data = self[statistic]  # todo : maybe there's a better way?!
                save_zipped_json(data, filename)
                CP.print_blue(f'Stats json stored at {filename}')
            except Exception as e:
                CP.print_red(f'Exception occurred on {filename}!')
                CP.print_red(str(e))
                if statistic == 'netlsd':
                    save_zipped_json(data, filename + '.failed')
        return
Ejemplo n.º 5
0
def run_infinity_mirror(args, trial) -> None:
    """
    Creates and runs infinity mirror
    :return:
    """
    selection, g, model, num_gens, use_pickle, num_graphs, rewire, finish, features_bool = process_args(
        args)

    # process args returns the Class and not an object
    empty_g = nx.empty_graph(1)
    empty_g.name = 'empty'  # create an empty graph as a placeholder

    if args.model[0] in ('GCN_AE', 'GCN_VAE', 'Linear_AE', 'Linear_VAE',
                         'Deep_GCN_AE', 'Deep_GCN_VAE'):
        model_obj = model(input_graph=empty_g, trial=trial, kind=args.model[0])
    else:
        model_obj = model(
            input_graph=empty_g, trial=trial
        )  # this is a roundabout way to ensure the name of GraphModel object is correct

    imt_output_dir = get_imt_output_directory()
    make_dirs(output_dir=imt_output_dir,
              gname=g.name,
              model=model_obj.model_name)

    assert selection == 'fast', 'invalid selection'
    num_graphs = 1  # only 1 graph per generation
    inf = InfinityMirror(initial_graph=g,
                         num_generations=num_gens,
                         model_obj=model_obj,
                         num_graphs=num_graphs,
                         trial=trial,
                         r=rewire,
                         dataset=g.name,
                         model=args.model[0],
                         finish=finish,
                         features_bool=features_bool)
    tic = time.perf_counter()
    inf.run(use_pickle=use_pickle)
    toc = time.perf_counter()

    inf.write_timing_stats(round(toc - tic, 3))
    print(trial, inf)
    return
def stats_computation(dataset, model, trial, filename, stats):
    path = Path(
        get_imt_output_directory()) / 'pickles' / dataset / model / filename
    graph_list = load_pickle(path)
    assert isinstance(
        graph_list,
        list), f'Expected type "list" and got type {type(graph_list)}.'
    assert all(isinstance(g, nx.Graph) for g in graph_list
               ), f'Expected a list of nx.Graph and got disappointed instead.'

    ColorPrint.print_orange(f'{filename} has length {len(graph_list)}')

    for idx, G in enumerate(graph_list):
        gs_obj = GraphStats(graph=G,
                            dataset=dataset,
                            model=model,
                            trial=trial,
                            iteration=idx)
        gs_obj.write_stats_jsons(stats=stats)

    return None
Ejemplo n.º 7
0
    for dataset in datasets:
        for model in models:
            for stat in stats:
                ColorPrint.print_green(
                    f'computing {stat} distances for {dataset} {model}')
                trials = walker_texas_ranger(dataset,
                                             model,
                                             stat=implemented_metrics[stat],
                                             unique=True)
                args = [[dataset, model, trial, stat] for trial in trials]
                print(args[:5])
                # exit(-1)
                try:
                    results = parallel_async(distance_computation,
                                             args,
                                             num_workers=10)
                    df = pd.concat(results)
                except Exception as e:
                    ColorPrint.print_red(
                        f'Error, for {dataset!r} {model!r} {stat!r}')
                    continue

                # output_dir = f'/data/infinity-mirror/output/distances/{dataset}/{model}/{stat}/'
                output_dir = Path(
                    get_imt_output_directory()) / 'distances' / dataset
                ensure_dir(output_dir, recursive=True)
                df.to_csv(output_dir / f'{dataset}_{model}_{stat}.csv')
                # for arg in args:
                #     distance_computation(*arg)