def _binned2norm(induced: np.ndarray, outpath: str, title: str, dpi=400, transparent=False): """The target for Binned2Norm: bar plot of the induced changes in the 2norm Args: induced (np.ndarray): a list of floats of induced changes in 2-norm outpath (str): the folder or zip file to save to """ _, outfolder = mutils.process_outfile(outpath, False) os.makedirs(outfolder, exist_ok=True) fig, ax = plt.subplots() ax.set_xlabel('Induced $\\Delta \\| W \\|_2$').set_fontsize(16) ax.set_ylabel('Count').set_fontsize(16) ax.set_title(title).set_fontsize(18) ax.hist(induced, bins=10) fig.savefig(os.path.join(outfolder, 'histogram.png'), dpi=dpi, transparent=transparent) plt.close(fig) zipdir(outfolder)
def plot(traj: SaturationTrajectory, outfile: str, exist_ok: bool = False, xlabel: str = 'Layers') -> None: """Plots saturation information through layers to the given folder Args: traj (SaturationTrajectory): the trajectory to plot outfile (str): the zip file to save plots to exist_ok (bool, optional): Defaults to False. True to overwrite, False to error if the file already exists xlabel (str, optional): Defaults to 'Layers'. The label for the x-axis for plots that go through layers """ outfile, outfile_wo_ext = mutils.process_outfile(outfile, exist_ok) os.makedirs(outfile_wo_ext) _plot_boxplot(traj, os.path.join(outfile_wo_ext, 'boxplot.png'), xlabel) for identifier in BUCKETING_TECHNIQUES: _plot_hist(traj, os.path.join(outfile_wo_ext, f'hist_{identifier}.png'), xlabel, identifier) for num_bins in BUCKETING_SIZES: _plot_hist( traj, os.path.join(outfile_wo_ext, f'hist_fixed_nbins_{num_bins}.png'), xlabel, num_bins) if exist_ok and os.path.exists(outfile): os.remove(outfile) zipdir(outfile_wo_ext)
def finished(self, context: GenericTrainingContext, result: dict): """Finishes the worker, closes and deletes mmap'd files, zips directory""" context.logger.info('[PCA3D-ThroughTrain] Cleaning up and archiving') self._send_hidacts(context) for connection in self.connections: connection.start_finish() for connection in self.connections: connection.end_finish() self.connections = None self.sample_labels_torch = None self.sample_points_torch = None self.sample_labels._mmap.close() # pylint: disable=protected-access self.sample_labels = None self.sample_points = None for lyr in self.layers: lyr._mmap.close() # pylint: disable=protected-access self.layers = None os.remove(self.sample_labels_file) for hafile in self.hid_acts_files: os.remove(hafile) self.sample_labels_file = None self.hid_acts_files = None zipdir(self.output_folder)
def finished(self, context: GenericTrainingContext, result: dict) -> None: #pylint: disable=unused-argument """Zips the directory""" if not os.path.exists(self.dirpath): return if os.path.exists(self.dirpath + '.zip'): os.remove(self.dirpath + '.zip') zipdir(self.dirpath)
def save_using(samples: np.ndarray, labels: np.ndarray, *layer_acts: typing.Tuple[np.ndarray], num_labels: int, outpath: str, exist_ok: bool, meta: dict, **additional: typing.Dict[str, np.ndarray]): """Stores the activations of the network to the given file, optionally overwriting it if it already exists. Args: samples (np.ndarray): the samples presented to the network of dimensions [num_samples, input_dim] labels (np.ndarray): the labels corresponding to the samples presented [num_samples] layer_acts (tuple[np.ndarray]): the activations of the network. each element corresponds to an array of activations with dimensions [num_samples, layer_size] outpath (str): the file to save to, should be a zip file exist_ok (bool): True to overwrite existing files, False not to meta (dict): saved alongside the data in json-format additional (dict[str, ndarray]): any additional arrays to save """ filepath, folderpath = mutils.process_outfile(outpath, exist_ok) os.makedirs(folderpath, exist_ok=True) label_masks = [labels == val for val in range(num_labels)] asdict = dict({'samples': samples, 'labels': labels}, **additional) layers_stacked = None for layer, act in enumerate(layer_acts): if layer > 0 and layer < len(layer_acts): if layers_stacked is None: layers_stacked = np.expand_dims(act, 0) elif act.shape[0] == layers_stacked.shape[1] and act.shape[1] == layers_stacked.shape[2]: layers_stacked = np.concatenate((layers_stacked, np.expand_dims(act, 0)), axis=0) asdict[f'layer_{layer}'] = act for label, mask in enumerate(label_masks): asdict[f'layer_{layer}_label_{label}'] = act[mask] asdict['layers_stacked'] = layers_stacked scipy.io.savemat(os.path.join(folderpath, 'all'), asdict) # pylint: disable=no-member np.savez(os.path.join(folderpath, 'all'), **asdict) if SAVE_SPLIT: for key, val in asdict.items(): scipy.io.savemat(os.path.join(folderpath, key), {key: val}) # pylint: disable=no-member np.savez(os.path.join(folderpath, key), val) scipy.io.savemat(os.path.join(folderpath, 'meta'), meta) # pylint: disable=no-member with open(os.path.join(folderpath, 'meta.json'), 'w') as outfile: json.dump(meta, outfile) if os.path.exists(filepath): os.remove(filepath) filetools.zipdir(folderpath)
def plot_avg_pr_trajectories(trajectories: typing.List[TrajectoryWithMeta], savepath: str, title: str, exist_ok: bool = False): """Plots multiple participation ratio trajectories on a single figure, where each trajectory must be associated with a particular label, where each trajectory is actually the average of multiple trajectories Arguments: trajectories (list[TrajectoryWithMeta]): the trajectories to plot savepath (str): the zip file to save the resulting figures in title (str): the title for the figure exist_ok (bool, default False): True to overwrite existing files, False not to """ if not isinstance(trajectories, (list, tuple)): raise ValueError(f'expected trajectories is list or tuple, got {trajectories} (type={type(trajectories)})') if not trajectories: raise ValueError(f'need at least one trajectory, got empty {type(trajectories)}') if not isinstance(trajectories[0], TrajectoryWithMeta): raise ValueError(f'expected trajectories[0] is TrajectoryWithMeta, got {trajectories[0]} (type={type(trajectories[0])})') layers = trajectories[0].trajectory.layers depth = trajectories[0].trajectory.overall.shape[0] if not isinstance(title, str): raise ValueError(f'expected title is str, got {title} (type={type(title)})') for i, traj in enumerate(trajectories): if not isinstance(traj, TrajectoryWithMeta): raise ValueError(f'expected trajectories[{i}] is TrajectoryWithMeta, got {traj} (type={type(traj)})') if traj.trajectory.layers != layers: raise ValueError(f'trajectories[0].trajectory.layers = {layers}, trajectories[{i}].trajectory.layers = {traj.trajectory.layers}') _depth = traj.trajectory.overall.shape[0] if depth != _depth: raise ValueError(f'trajectories[0].trajectory.overall.shape[0] = {depth}, trajectories[{i}].trajectory.overall.shape[0] = {_depth}') filename, folder = mutils.process_outfile(savepath, exist_ok) os.makedirs(folder, exist_ok=True) fig, ax = plt.subplots() ax.set_title(title).set_fontsize(18) ax.set_xlabel('Layer' if layers else 'Time').set_fontsize(16) ax.set_ylabel('Participation Ratio').set_fontsize(16) ax.set_xticks([i for i in range(depth)]) my_cmap = plt.get_cmap('Set1') cols = my_cmap([i for i in range(len(trajectories))]) x_vals = np.arange(depth) for ind, traj_meta in enumerate(trajectories): traj = traj_meta.trajectory ax.errorbar(x_vals, traj.overall.numpy(), yerr=traj.overall_sem.numpy()*1.96, color=cols[ind], label=traj_meta.label) ax.legend() fig.savefig(os.path.join(folder, 'out.png')) plt.close(fig) if os.path.exists(filename): os.remove(filename) zipdir(folder)
def save(self, outfile: str, exist_ok=False): """Saves this trajaectory to the given file Args: outfile (str): the filename to save to; should be a zip file exist_ok (bool): True to overwrite outfile if it exists, False not to """ _, folder = mutils.process_outfile(outfile, exist_ok=exist_ok) os.makedirs(folder, exist_ok=True) meta_dict = {'layers': self.layers} with open(os.path.join(folder, 'meta.json'), 'w') as metaout: json.dump(meta_dict, metaout) torch.save(self.overall, os.path.join(folder, 'overall.pt')) if self.by_label is not None: torch.save(self.by_label, os.path.join(folder, 'by_label.pt')) zipdir(folder)
def archive_raw_inputs(self, archive_path: str): """Archives the raw data to the workers to the given path Args: archive_path (str): the path to archive data to """ if not isinstance(archive_path, str): raise ValueError( f'expected archive path is str, got {archive_path}') self.join() working_path = _get_working_dir(self.identifier) zipdir(working_path) os.rename(working_path + '.zip', archive_path) self.workers_spawned = 0 self._prepared = False
def load(cls, infile: str): """Loads the PR trajectory saved to the given filepath Arguments: infile (str): the filename to load from; should be a zip file """ filename, folder = mutils.process_outfile(infile, exist_ok=True) if not os.path.exists(filename): raise FileNotFoundError(filename) unzip(filename) with open(os.path.join(folder, 'meta.json'), 'r') as meta_in: meta_dict = json.load(meta_in) overall = torch.load(os.path.join(folder, 'overall.pt')) by_label = None if os.path.exists(os.path.join(folder, 'by_label.pt')): by_label = torch.load(os.path.join(folder, 'by_label.pt')) zipdir(folder) return cls(overall=overall, layers=meta_dict['layers'], by_label=by_label)
def load(cls, filepath: str, compress: bool = True): """Loads the clusters located in the given filepath. If the filepath has an extension it must be .zip and it will be ignored. This will first check if the folder exists and then the archive. Arguments: filepath (str): the path to the folder or archive that the clusters were saved in compress (bool): if True the folder will be compressed after this is done, regardless of the old state. If this is False, the folder will not be compressed after this is done, regardless of the old state. """ outfile, outfile_wo_ext = mutils.process_outfile(filepath, True, False) if not os.path.exists(outfile_wo_ext): if not os.path.exists(outfile): raise FileNotFoundError(filepath) filetools.unzip(outfile) try: clusters_path = os.path.join(outfile_wo_ext, 'clusters.npz') if not os.path.exists(clusters_path): raise FileNotFoundError(clusters_path) calc_params_path = os.path.join(outfile_wo_ext, 'calculate_params.json') if not os.path.exists(calc_params_path): raise FileNotFoundError(calc_params_path) with np.load(clusters_path) as clusters: samples = clusters['samples'] centers = clusters['centers'] labels = clusters['labels'] with open(calc_params_path, 'r') as infile: calculate_params = json.load(infile) return Clusters(samples, centers, labels, calculate_params) finally: if compress and os.path.exists(outfile_wo_ext): filetools.zipdir(outfile_wo_ext)
def measure_dtt_ff(model: FeedforwardNetwork, pwl_prod: PointWithLabelProducer, outfile: str, exist_ok: bool = False, logger: typing.Optional[logging.Logger] = None, verbose: bool = False) -> None: """Analogue to measure_dtt for feed-forward networks""" if not isinstance(model, FeedforwardNetwork): raise ValueError(f'expected model is FeedforwardNetwork, got {model} (type={type(model)})') if not isinstance(pwl_prod, PointWithLabelProducer): raise ValueError(f'expected pwl_prod is PointWithLabelProducer, got {pwl_prod} (type={type(pwl_prod)})') if not isinstance(outfile, str): raise ValueError(f'expected outfile is str, got {outfile}') if not isinstance(exist_ok, bool): raise ValueError(f'expected exist_ok is bool, got {exist_ok}') if logger is not None and not isinstance(logger, logging.Logger): raise ValueError(f'expected logger is optional[logging.Logger], got {logger} (type={type(logger)})') if not isinstance(verbose, bool): raise ValueError(f'expected verbose is bool, got {verbose} (type={type(verbose)})') outfile_wo_ext = os.path.splitext(outfile)[0] if outfile_wo_ext == outfile: outfile = outfile_wo_ext + '.zip' if os.path.exists(outfile_wo_ext): raise FileExistsError(f'for outfile={outfile}, need {outfile_wo_ext} as working space') if not exist_ok and os.path.exists(outfile): raise FileExistsError(f'outfile {outfile} already exists (use exist_ok=True) to overwrite') num_samples = min(pwl_prod.epoch_size, 50 * pwl_prod.output_dim) sample_points = torch.zeros((num_samples, model.input_dim), dtype=torch.double) sample_labels = torch.zeros((num_samples,), dtype=torch.long) hid_acts = [] # each will be 2d tensor within_dists = [] # each value corresponds to a torch tensor of within dists within_means = torch.zeros(model.num_layers+1, dtype=torch.double) within_stds = torch.zeros(model.num_layers+1, dtype=torch.double) within_sems = torch.zeros(model.num_layers+1, dtype=torch.double) across_dists = [] # each value corresponds to a torch tensor of across dists across_means = torch.zeros(model.num_layers+1, dtype=torch.double) across_stds = torch.zeros(model.num_layers+1, dtype=torch.double) across_sems = torch.zeros(model.num_layers+1, dtype=torch.double) pwl_prod.mark() pwl_prod.fill(sample_points, sample_labels) pwl_prod.reset() def on_hidacts(acts_info: FFHiddenActivations): hidden_acts = acts_info.hidden_acts layer = acts_info.layer hid_acts.append(hidden_acts.detach()) within, across = measure_instant(hid_acts[layer], sample_labels, pwl_prod.output_dim) within_dists.append(within) across_dists.append(across) within_means[layer] = within.mean() within_stds[layer] = within.std() within_sems[layer] = within_stds[layer] / np.sqrt(num_samples) across_means[layer] = across.mean() across_stds[layer] = across.std() across_sems[layer] = across_stds[layer] / np.sqrt(num_samples) _dbg(verbose, logger, 'measure_dtt_ff getting raw data') model(sample_points, on_hidacts) layers = np.arange(model.num_layers+1) _plot_dtt_ff(layers, within_means, within_stds, within_sems, across_means, across_stds, across_sems, within_dists, across_dists, outfile_wo_ext, verbose, logger) _save_dtt_ff(sample_points, sample_labels, hid_acts, within_dists, across_dists, outfile_wo_ext) if os.path.exists(outfile): os.remove(outfile) zipdir(outfile_wo_ext)
def digest_ff_activations( sample_points: np.ndarray, sample_labels: np.ndarray, output_dim: int, *hid_acts: typing.List[np.ndarray], outfile: str, exist_ok: bool): """This is a digest targettable version of the measure_dtt_ff, which accepts the hidden activations in the layer and stores plots to the given outfile and exist_ok Args: sample_points (ndarray): the sample points that we used to get layer acts sample_labels (ndarray): the sample labels that we used to get layer acts hid_acts (list[ndarray]): the hidden activations across each layer outfile (str): where to store the plots and data exist_ok (bool): True to overwrite, False to error when file exists """ if exist_ok is None: raise ValueError(f'expected exist_ok is bool, got {exist_ok} (are you missing some arguments?)') if not isinstance(output_dim, int): raise ValueError(f'expected output_dim is int, got {output_dim}') sample_points = torch.from_numpy(sample_points).double() sample_labels = torch.from_numpy(sample_labels).int() hid_acts = [torch.from_numpy(hid_act).double() for hid_act in hid_acts] outfile_wo_ext = os.path.splitext(outfile)[0] if outfile == outfile_wo_ext: outfile += '.zip' if os.path.exists(outfile_wo_ext): raise FileExistsError(f'for outfile={outfile}, need {outfile_wo_ext} as working space') if not exist_ok and os.path.exists(outfile): raise FileExistsError(f'outfile {outfile} already exists (use exist_ok=True) to overwrite') num_samples = sample_points.shape[0] within_dists = [] # each value corresponds to a torch tensor of within dists within_means = torch.zeros(len(hid_acts), dtype=torch.double) within_stds = torch.zeros(len(hid_acts), dtype=torch.double) within_sems = torch.zeros(len(hid_acts), dtype=torch.double) across_dists = [] # each value corresponds to a torch tensor of across dists across_means = torch.zeros(len(hid_acts), dtype=torch.double) across_stds = torch.zeros(len(hid_acts), dtype=torch.double) across_sems = torch.zeros(len(hid_acts), dtype=torch.double) for layer, layer_acts in enumerate(hid_acts): within, across = measure_instant(layer_acts, sample_labels, output_dim) within_dists.append(within) across_dists.append(across) within_means[layer] = within.mean() within_stds[layer] = within.std() within_sems[layer] = within_stds[layer] / np.sqrt(num_samples) across_means[layer] = across.mean() across_stds[layer] = across.std() across_sems[layer] = across_stds[layer] / np.sqrt(num_samples) layers = np.arange(len(hid_acts)) _plot_dtt_ff(layers, within_means, within_stds, within_sems, across_means, across_stds, across_sems, within_dists, across_dists, outfile_wo_ext, False, None) _save_dtt_ff(sample_points, sample_labels, hid_acts, within_dists, across_dists, outfile_wo_ext) if os.path.exists(outfile): os.remove(outfile) zipdir(outfile_wo_ext)
def plot_traj_ff(traj: SVMTrajectory, outfile: str, exist_ok: bool = False): """Plots the given trajectory to the given file. The file should have no extension or have the .zip extension. Args: traj (SVMTrajectory): the trajectory to plot outfile (str): where to save the plot (will be zipped) exist_ok (bool, optional): Defaults to False. if existing files should be overwritten """ if not isinstance(traj, SVMTrajectory): raise ValueError(f'expected traj is SVMTrajectory, got {traj}') if not isinstance(outfile, str): raise ValueError(f'expected outfile is str, got {outfile}') if not isinstance(exist_ok, bool): raise ValueError(f'expected exist_ok is bool, got {exist_ok}') outfile_wo_ext = os.path.splitext(outfile)[0] if outfile == outfile_wo_ext: outfile += '.zip' if os.path.exists(outfile_wo_ext): raise FileExistsError(f'need {outfile_wo_ext} as working space to create {outfile}') if not exist_ok and os.path.exists(outfile): raise FileExistsError(f'{outfile} already exists (use exist_ok=True to overwrite)') os.makedirs(outfile_wo_ext) xlabel = 'Layers' ylabel = 'SVM Accuracy (%)' layers = np.arange(traj.overall.shape[0]) num_labels = int(traj.by_label_vs_all.shape[1]) if traj.by_label_vs_all is not None else 2 chance_perc = 1.0 / num_labels fig, ax = plt.subplots() ax.set_title(f'{ylabel} Through {xlabel} (Overall)') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if traj.by_label_vs_all is not None: for lbl in range(traj.by_label_vs_all.shape[1]): ax.plot(layers, traj.by_label_vs_all[:, lbl].numpy(), linestyle='dashed', label=f'{lbl} vs all', alpha=0.6) ax.plot(layers, traj.overall.numpy(), label='Overall') ax.set_xticks(layers) ax.legend(loc=1) fig.savefig(os.path.join(outfile_wo_ext, 'overall.png')) plt.close(fig) fig, ax = plt.subplots() ax.set_title(f'{ylabel} Through {xlabel} (All Only)') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.axhline(chance_perc, layers.min().item(), layers.max().item(), linestyle='dashed', color='k', label='Chance Acc.', alpha=0.6) ax.plot(layers, traj.overall.numpy(), label='Overall') ax.set_xticks(layers) ax.legend(loc=1) fig.savefig(os.path.join(outfile_wo_ext, 'allonly.png')) plt.close(fig) fig, ax = plt.subplots() # previous plot with consistent scale ax.set_title(f'{xlabel} Through {ylabel} (All Only)') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.axhline(chance_perc, layers.min(), layers.max(), linestyle='dashed', color='k', label='Chance Acc.', alpha=0.6) ax.plot(layers, traj.overall.numpy(), label='Overall') ax.set_xticks(layers) ax.set_ylim(0, 1) ax.legend(loc=1) fig.savefig(os.path.join(outfile_wo_ext, 'allonly_0_1_scale.png')) plt.close(fig) if traj.by_label_vs_all is not None: best_square = int(np.ceil(np.sqrt(num_labels))) num_cols = best_square num_rows = int(np.ceil(num_labels / best_square)) fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False, sharey='all', sharex='all') chance_perc = (num_labels - 1) / num_labels fig.suptitle(f'{xlabel} through {ylabel} (By Label 1 vs All)') lbl = 0 for row in range(num_rows): for col in range(num_cols): ax = axes[row][col] if lbl >= num_labels: ax.remove() continue yvals = traj.by_label_vs_all[:, lbl].numpy() ax.set_title(str(lbl)) ax.plot(layers, yvals, label=str(lbl)) ax.axhline(chance_perc, layers.min(), layers.max(), linestyle='dashed', color='k', label='Chance Acc.', alpha=0.6) lbl += 1 axes[0][0].set_xticks(layers) fig.savefig(os.path.join(outfile_wo_ext, 'by_label.png')) plt.close(fig) if exist_ok and os.path.exists(outfile): os.remove(outfile) zipdir(outfile_wo_ext)
def save(self, filepath: str, exist_ok: bool = False, compress: bool = True) -> None: """Saves these clusters along with a description about how to load them to the given filepath. If the filepath has an extension, it must be .zip and it will be ignored in favor of compress. Arguments: filepath (str): the folder or zip file where these clusters should be saves exist_ok (bool): effects the behavior if the folder or zip file already exists. If this is False, then an error is thrown. If this is True, the existing files are deleted compress (bool): if True, the folder is compressed to a zip file after saving and the folder is deleted. If False, the result is left as a folder """ outfile, outfile_wo_ext = mutils.process_outfile( filepath, exist_ok, compress) if os.path.exists(outfile_wo_ext): filetools.deldir(outfile_wo_ext) os.makedirs(outfile_wo_ext) np.savez_compressed(os.path.join(outfile_wo_ext, 'clusters.npz'), samples=self.samples, centers=self.centers, labels=self.labels) with open(os.path.join(outfile_wo_ext, 'calculate_params.json'), 'w') as out: json.dump(self.calculate_params, out) with open(os.path.join(outfile_wo_ext, 'readme.md'), 'w') as out: def _print(*args, **kwargs): print(*args, **kwargs, file=out) _print('Clusters') _print(' clusters.npz:') _print( ' samples [n_samples, n_features] - the samples the clusters were calculated' + ' from') _print( ' centers [n_clusters, n_features] - the centers of the clusters' ) _print( ' labels [n_samples] - the index in centers for the closest cluster ' + 'to each label') _print(' calculate_params.json:') _print( ' Varies. Gives information about how clusters were calculated' ) if compress: if os.path.exists(outfile): os.remove(outfile) filetools.zipdir(outfile_wo_ext)
def plot_trajectory(traj: PCTrajectoryGen, filepath: str, exist_ok: bool = False, alpha: float = 0.5, square: bool = True, transparent: bool = True, s: int = 1, ots: OutputToScalarMapping = SqueezeOTSMapping(), cmap: typing.Union[mcolors.Colormap, str] = 'cividis', norm: mcolors.Normalize = mcolors.Normalize(-1, 1), compress: bool = False): """Plots the given trajectory by storing it in the given filepath. If the output of the trajectory is not itself a scalar, the output to scalar mapping must be set. The other arguments are related to display. Args: traj (PCTrajectoryGen): The trajectory to plot. Must have at least 2 pcs filepath (str): Where to store the given trajectory, either a folder or a zip file. The file zip extension will only be used if compress is true exist_ok (bool, optional): If the filepath already exists, then this determines if it should be overwritten (True) or an error should be raised (False). Defaults to False. alpha (float, optional): The transparency value for each vector. Defaults to 0.5. square (bool, optional): If the dimensions of the space should be equal for width and height (such that 1 inch width and height visually corresponds to the same amount of distance in pc-space). Since pc space is naturally rectangular, not setting this can easily lead to misinterpretations. Defaults to True. transparent (bool, optional): Determines the background color of the saved images, where True is transparency and False is near-white. Defaults to True. s (int, optional): The size of each projected sample. Defaults to 1. ots (OutputToScalarMapping, optional): Maps the labels of the trajectory to samples which are then converted to colors using the color map. Defaults to SqueezeOTSMapping(). cmap (str or Colormap, optional): The color map to use. Defaults to 'cividis'. norm (mcolors.Normalize, optional): Normalizes the scalars that are passed to the color map to the range 0-1. Defaults to normalizing linearly from [-1, 1] to [0, 1] compress (bool): if the folder should be zipped """ tus.check( traj=(traj, PCTrajectoryGen), filepath=(filepath, str), exist_ok=(exist_ok, bool), alpha=(alpha, float), square=(square, bool), transparent=(transparent, bool), s=(s, int), ots=(ots, OutputToScalarMapping), cmap=(cmap, (str, mcolors.Colormap)) ) outfile, outfile_wo_ext = mutils.process_outfile(filepath, exist_ok, compress) if not compress and exist_ok and os.path.exists(outfile_wo_ext): filetools.deldir(outfile_wo_ext) os.makedirs(outfile_wo_ext) num_splots_req = traj.num_layers + 1 closest_square: int = int(np.ceil(np.sqrt(num_splots_req))) num_cols: int = int(math.ceil(num_splots_req / closest_square)) local_fig, local_axs = plt.subplots(num_cols, closest_square, squeeze=False, figsize=FRAME_SIZE) layer: int = 0 for x in range(num_cols): for y in range(closest_square): if layer >= num_splots_req: local_axs[x][y].remove() continue elif layer >= traj.num_layers: lspace = np.linspace(norm.vmin, norm.vmax, 100) axis = local_axs[x][y] axis.tick_params(axis='both', which='both', bottom=False, left=False, top=False, labelbottom=False, labelleft=False) axis.imshow(lspace[..., np.newaxis], cmap=cmap, norm=norm, aspect=0.2) layer += 1 continue snapshot: PCTrajectoryGenSnapshot = traj[layer] projected = snapshot.projected_samples projected_lbls = snapshot.projected_sample_labels min_x, min_y, max_x, max_y = (torch.min(projected[:, 0]), torch.min(projected[:, 1]), torch.max(projected[:, 0]), torch.max(projected[:, 1])) min_x, min_y, max_x, max_y = min_x.item(), min_y.item(), max_x.item(), max_y.item() if max_x - min_x < 1e-3: min_x -= 5e-4 max_x += 5e-4 if max_y - min_y < 1e-3: min_y -= 5e-4 max_y += 5e-4 if square: extents_x = max_x - min_x extents_y = max_y - min_y if extents_x > extents_y: upd = (extents_x - extents_y) / 2 min_y -= upd max_y += upd else: upd = (extents_y - extents_x) / 2 min_x -= upd max_x += upd padding_x = (max_x - min_x) * .1 padding_y = (max_y - min_y) * .1 vis_min_x = min_x - padding_x vis_max_x = max_x + padding_x vis_min_y = min_y - padding_y vis_max_y = max_y + padding_y projected_colors = ots(projected_lbls) axis = local_axs[x][y] axis.scatter(projected[:, 0].numpy(), projected[:, 1].numpy(), s=s, alpha=alpha, c=projected_colors.numpy(), cmap=mcm.get_cmap(cmap), norm=norm) axis.set_xlim([vis_min_x, vis_max_x]) axis.set_ylim([vis_min_y, vis_max_y]) axis.tick_params(axis='both', which='both', bottom=False, left=False, top=False, labelbottom=False, labelleft=False) layer += 1 local_path = os.path.join(outfile_wo_ext, 'local.png') local_fig.tight_layout() local_fig.savefig(local_path, transparent=transparent, DPI=DPI) np.savez(os.path.join(outfile_wo_ext, 'principal_vectors.npz'), *[snapshot.principal_vectors for snapshot in traj]) np.savez(os.path.join(outfile_wo_ext, 'principal_values.npz'), *[snapshot.principal_values for snapshot in traj]) np.savez(os.path.join(outfile_wo_ext, 'projected_samples.npz'), *[snapshot.projected_samples for snapshot in traj]) np.savez(os.path.join(outfile_wo_ext, 'projected_sample_labels.npz'), *[snapshot.projected_sample_labels for snapshot in traj]) if compress: if os.path.exists(outfile): os.remove(outfile) filetools.zipdir(outfile_wo_ext)
def replot_dtt_ff(infile: str, verbose: bool = True, logger: logging.Logger = None): """Recreates the dtt_ff plots for the given zip, replacing them inside the zip. Args: infile (str): the outfile that you used when measuring verbose (bool): if this should print progress information logger (Logger): the logger to use, None for print """ if not isinstance(infile, str): raise ValueError(f'expected infile is str, got {infile} (type={type(infile)})') if not isinstance(verbose, bool): raise ValueError(f'expected verbose is bool, got {verbose} (type={type(verbose)})') if logger is not None and not isinstance(logger, logging.Logger): raise ValueError(f'expected logger is optional[logging.Logger], got {logger} (type={type(logger)})') _dbg(verbose, logger, f'unpacking {infile}') unzip(infile) infile_wo_ext = os.path.splitext(infile)[0] _dbg(verbose, logger, f'fetching data') try: within_dists, across_dists = [], [] num_samples: int with np.load(os.path.join(infile_wo_ext, 'within.npz')) as within_dict: i = 0 while f'arr_{i}' in within_dict: within_dists.append(within_dict[f'arr_{i}']) i += 1 with np.load(os.path.join(infile_wo_ext, 'across.npz')) as across_dict: i = 0 while f'arr_{i}' in across_dict: across_dists.append(across_dict[f'arr_{i}']) i += 1 with np.load(os.path.join(infile_wo_ext, 'sample.npz')) as sample_dict: num_samples = sample_dict['sample_labels'].shape[0] num_layers = len(within_dists) - 1 if len(across_dists) != num_layers + 1: raise ValueError(f'expected within_dists has same len as across_dists, but len(within_dists)={len(within_dists)}, len(across_dists)={len(across_dists)}') within_means = torch.zeros(num_layers+1, dtype=torch.double) within_stds = torch.zeros(num_layers+1, dtype=torch.double) within_sems = torch.zeros(num_layers+1, dtype=torch.double) across_means = torch.zeros(num_layers+1, dtype=torch.double) across_stds = torch.zeros(num_layers+1, dtype=torch.double) across_sems = torch.zeros(num_layers+1, dtype=torch.double) for i in range(num_layers+1): within_means[i] = within_dists[i].mean() within_stds[i] = within_dists[i].std() within_sems[i] = within_stds[i] / np.sqrt(num_samples) across_means[i] = across_dists[i].mean() across_stds[i] = across_dists[i].std() across_sems[i] = across_stds[i] / np.sqrt(num_samples) layers = np.arange(num_layers+1) _plot_dtt_ff(layers, within_means, within_stds, within_sems, across_means, across_stds, across_sems, within_dists, across_dists, infile_wo_ext, verbose, logger) finally: _dbg(verbose, logger, f'repacking {infile}') zipdir(infile_wo_ext)
def plot_pr_trajectory(traj: PRTrajectory, savepath: str, exist_ok: bool = False, label_map: typing.Optional[typing.Dict[int, str]] = None): """Plots the given trajectory and saves it to the given zip archive Args: traj (PRTrajectory): The trajectory to plot savepath (str): Where to save the trajectory exist_ok (bool, optional): Defaults to False. if we should overwrite label_map (dict[int, str], doptional): Defaults to None. If specified, these are the display names for the labels. Defaults to just the string representation of the label. May omit any or all labels """ if not isinstance(traj, PRTrajectory): raise ValueError(f'expected traj is PRTrajectory, got {traj} (type={type(traj)})') if not isinstance(savepath, str): raise ValueError(f'expected savepath is str, got {savepath} (type={type(savepath)})') if not isinstance(exist_ok, bool): raise ValueError(f'expected exist_ok is bool, got {exist_ok} (type={type(exist_ok)})') if label_map is None and traj.by_label is not None: label_map = dict((lbl, str(lbl)) for lbl in range(len(traj.by_label))) elif traj.by_label is not None: if not isinstance(label_map, dict): raise ValueError(f'expected label_map is dict, got {label_map} (type={type(label_map)})') for lbl in range(len(traj.by_label)): if lbl not in label_map: label_map[lbl] = str(lbl) savepath_wo_ext = os.path.splitext(savepath)[0] if savepath == savepath_wo_ext: savepath += '.zip' if os.path.exists(savepath_wo_ext): raise FileExistsError(f'to save at {savepath}, {savepath_wo_ext} must be empty but it already exists') if not exist_ok and os.path.exists(savepath): raise FileExistsError(f'cannot save at {savepath} (already exists). set exist_ok=True to overwrite') os.makedirs(savepath_wo_ext) through_str = 'Layers' if traj.layers else 'Time' x_label = through_str y_label = 'Participation Ratio' x_vals = np.arange(traj.overall.shape[0]) fig, axs = plt.subplots() axs.set_title(f'PR Through {through_str} (Global)') axs.set_xlabel(x_label) axs.set_ylabel(y_label) axs.plot(x_vals, traj.overall.numpy()) axs.set_xticks(x_vals) fig.tight_layout() fig.savefig(os.path.join(savepath_wo_ext, 'global.png')) plt.close(fig) fig, axs = plt.subplots() axs.set_title(f'PR Through {through_str} (All)') axs.set_xlabel(x_label) axs.set_ylabel(y_label) if traj.by_label is not None: for lbl, y_vals in enumerate(traj.by_label): axs.plot(x_vals, y_vals.numpy(), '--', label=label_map[lbl], alpha=0.6) axs.plot(x_vals, traj.overall.numpy(), label='Overall', alpha=1) axs.set_xticks(x_vals) axs.legend() fig.tight_layout() fig.savefig(os.path.join(savepath_wo_ext, 'all.png')) plt.close(fig) if traj.by_label is not None: for lbl, y_vals in enumerate(traj.by_label): fig, axs = plt.subplots() axs.set_title(f'PR Through {through_str} ({label_map[lbl]})') axs.set_xlabel(x_label) axs.set_ylabel(y_label) axs.plot(x_vals, y_vals.numpy()) axs.set_xticks(x_vals) fig.tight_layout() fig.savefig(os.path.join(savepath_wo_ext, f'{lbl}.png')) plt.close(fig) traj.save(os.path.join(savepath_wo_ext, 'traj.zip')) if os.path.exists(savepath): os.remove(savepath) zipdir(savepath_wo_ext)