Beispiel #1
0
def _binned2norm(induced: np.ndarray,
                 outpath: str,
                 title: str,
                 dpi=400,
                 transparent=False):
    """The target for Binned2Norm: bar plot of the induced changes in the 2norm

    Args:
        induced (np.ndarray): a list of floats of induced changes in 2-norm
        outpath (str): the folder or zip file to save to
    """
    _, outfolder = mutils.process_outfile(outpath, False)
    os.makedirs(outfolder, exist_ok=True)

    fig, ax = plt.subplots()
    ax.set_xlabel('Induced $\\Delta \\| W \\|_2$').set_fontsize(16)
    ax.set_ylabel('Count').set_fontsize(16)

    ax.set_title(title).set_fontsize(18)

    ax.hist(induced, bins=10)

    fig.savefig(os.path.join(outfolder, 'histogram.png'),
                dpi=dpi,
                transparent=transparent)
    plt.close(fig)

    zipdir(outfolder)
def plot(traj: SaturationTrajectory,
         outfile: str,
         exist_ok: bool = False,
         xlabel: str = 'Layers') -> None:
    """Plots saturation information through layers to the given folder

    Args:
        traj (SaturationTrajectory): the trajectory to plot
        outfile (str): the zip file to save plots to
        exist_ok (bool, optional): Defaults to False. True to overwrite, False to error
            if the file already exists
        xlabel (str, optional): Defaults to 'Layers'. The label for the x-axis for plots
            that go through layers
    """

    outfile, outfile_wo_ext = mutils.process_outfile(outfile, exist_ok)

    os.makedirs(outfile_wo_ext)
    _plot_boxplot(traj, os.path.join(outfile_wo_ext, 'boxplot.png'), xlabel)
    for identifier in BUCKETING_TECHNIQUES:
        _plot_hist(traj, os.path.join(outfile_wo_ext,
                                      f'hist_{identifier}.png'), xlabel,
                   identifier)
    for num_bins in BUCKETING_SIZES:
        _plot_hist(
            traj,
            os.path.join(outfile_wo_ext, f'hist_fixed_nbins_{num_bins}.png'),
            xlabel, num_bins)

    if exist_ok and os.path.exists(outfile):
        os.remove(outfile)
    zipdir(outfile_wo_ext)
Beispiel #3
0
    def finished(self, context: GenericTrainingContext, result: dict):
        """Finishes the worker, closes and deletes mmap'd files, zips directory"""
        context.logger.info('[PCA3D-ThroughTrain] Cleaning up and archiving')
        self._send_hidacts(context)

        for connection in self.connections:
            connection.start_finish()
        for connection in self.connections:
            connection.end_finish()
        self.connections = None

        self.sample_labels_torch = None
        self.sample_points_torch = None

        self.sample_labels._mmap.close()  # pylint: disable=protected-access
        self.sample_labels = None

        self.sample_points = None

        for lyr in self.layers:
            lyr._mmap.close()  # pylint: disable=protected-access

        self.layers = None

        os.remove(self.sample_labels_file)

        for hafile in self.hid_acts_files:
            os.remove(hafile)

        self.sample_labels_file = None
        self.hid_acts_files = None

        zipdir(self.output_folder)
    def finished(self, context: GenericTrainingContext, result: dict) -> None:  #pylint: disable=unused-argument
        """Zips the directory"""
        if not os.path.exists(self.dirpath):
            return

        if os.path.exists(self.dirpath + '.zip'):
            os.remove(self.dirpath + '.zip')
        zipdir(self.dirpath)
Beispiel #5
0
def save_using(samples: np.ndarray, labels: np.ndarray, *layer_acts: typing.Tuple[np.ndarray],
               num_labels: int, outpath: str, exist_ok: bool, meta: dict,
               **additional: typing.Dict[str, np.ndarray]):
    """Stores the activations of the network to the given file, optionally
    overwriting it if it already exists.

    Args:
        samples (np.ndarray): the samples presented to the network of dimensions
            [num_samples, input_dim]
        labels (np.ndarray): the labels corresponding to the samples presented
            [num_samples]
        layer_acts (tuple[np.ndarray]): the activations of the network. each element
            corresponds to an array of activations with dimensions
            [num_samples, layer_size]
        outpath (str): the file to save to, should be a zip file
        exist_ok (bool): True to overwrite existing files, False not to
        meta (dict): saved alongside the data in json-format
        additional (dict[str, ndarray]): any additional arrays to save
    """
    filepath, folderpath = mutils.process_outfile(outpath, exist_ok)

    os.makedirs(folderpath, exist_ok=True)

    label_masks = [labels == val for val in range(num_labels)]

    asdict = dict({'samples': samples, 'labels': labels}, **additional)
    layers_stacked = None
    for layer, act in enumerate(layer_acts):
        if layer > 0 and layer < len(layer_acts):
            if layers_stacked is None:
                layers_stacked = np.expand_dims(act, 0)
            elif act.shape[0] == layers_stacked.shape[1] and act.shape[1] == layers_stacked.shape[2]:
                layers_stacked = np.concatenate((layers_stacked, np.expand_dims(act, 0)), axis=0)

        asdict[f'layer_{layer}'] = act
        for label, mask in enumerate(label_masks):
            asdict[f'layer_{layer}_label_{label}'] = act[mask]

    asdict['layers_stacked'] = layers_stacked
    scipy.io.savemat(os.path.join(folderpath, 'all'), asdict) # pylint: disable=no-member
    np.savez(os.path.join(folderpath, 'all'), **asdict)

    if SAVE_SPLIT:
        for key, val in asdict.items():
            scipy.io.savemat(os.path.join(folderpath, key), {key: val}) # pylint: disable=no-member
            np.savez(os.path.join(folderpath, key), val)

    scipy.io.savemat(os.path.join(folderpath, 'meta'), meta) # pylint: disable=no-member
    with open(os.path.join(folderpath, 'meta.json'), 'w') as outfile:
        json.dump(meta, outfile)

    if os.path.exists(filepath):
        os.remove(filepath)
    filetools.zipdir(folderpath)
def plot_avg_pr_trajectories(trajectories: typing.List[TrajectoryWithMeta],
                             savepath: str, title: str, exist_ok: bool = False):
    """Plots multiple participation ratio trajectories on a single figure,
    where each trajectory must be associated with a particular label, where
    each trajectory is actually the average of multiple trajectories

    Arguments:
        trajectories (list[TrajectoryWithMeta]): the trajectories to plot
        savepath (str): the zip file to save the resulting figures in
        title (str): the title for the figure
        exist_ok (bool, default False): True to overwrite existing files, False not to
    """
    if not isinstance(trajectories, (list, tuple)):
        raise ValueError(f'expected trajectories is list or tuple, got {trajectories} (type={type(trajectories)})')
    if not trajectories:
        raise ValueError(f'need at least one trajectory, got empty {type(trajectories)}')
    if not isinstance(trajectories[0], TrajectoryWithMeta):
        raise ValueError(f'expected trajectories[0] is TrajectoryWithMeta, got {trajectories[0]} (type={type(trajectories[0])})')
    layers = trajectories[0].trajectory.layers
    depth = trajectories[0].trajectory.overall.shape[0]
    if not isinstance(title, str):
        raise ValueError(f'expected title is str, got {title} (type={type(title)})')
    for i, traj in enumerate(trajectories):
        if not isinstance(traj, TrajectoryWithMeta):
            raise ValueError(f'expected trajectories[{i}] is TrajectoryWithMeta, got {traj} (type={type(traj)})')
        if traj.trajectory.layers != layers:
            raise ValueError(f'trajectories[0].trajectory.layers = {layers}, trajectories[{i}].trajectory.layers = {traj.trajectory.layers}')
        _depth = traj.trajectory.overall.shape[0]
        if depth != _depth:
            raise ValueError(f'trajectories[0].trajectory.overall.shape[0] = {depth}, trajectories[{i}].trajectory.overall.shape[0] = {_depth}')

    filename, folder = mutils.process_outfile(savepath, exist_ok)
    os.makedirs(folder, exist_ok=True)

    fig, ax = plt.subplots()
    ax.set_title(title).set_fontsize(18)
    ax.set_xlabel('Layer' if layers else 'Time').set_fontsize(16)
    ax.set_ylabel('Participation Ratio').set_fontsize(16)
    ax.set_xticks([i for i in range(depth)])

    my_cmap = plt.get_cmap('Set1')
    cols = my_cmap([i for i in range(len(trajectories))])
    x_vals = np.arange(depth)
    for ind, traj_meta in enumerate(trajectories):
        traj = traj_meta.trajectory
        ax.errorbar(x_vals, traj.overall.numpy(), yerr=traj.overall_sem.numpy()*1.96, color=cols[ind], label=traj_meta.label)
    ax.legend()

    fig.savefig(os.path.join(folder, 'out.png'))
    plt.close(fig)

    if os.path.exists(filename):
        os.remove(filename)
    zipdir(folder)
    def save(self, outfile: str, exist_ok=False):
        """Saves this trajaectory to the given file

        Args:
            outfile (str): the filename to save to; should be a zip file
            exist_ok (bool): True to overwrite outfile if it exists, False not to
        """
        _, folder = mutils.process_outfile(outfile, exist_ok=exist_ok)
        os.makedirs(folder, exist_ok=True)

        meta_dict = {'layers': self.layers}
        with open(os.path.join(folder, 'meta.json'), 'w') as metaout:
            json.dump(meta_dict, metaout)
        torch.save(self.overall, os.path.join(folder, 'overall.pt'))
        if self.by_label is not None:
            torch.save(self.by_label, os.path.join(folder, 'by_label.pt'))
        zipdir(folder)
Beispiel #8
0
    def archive_raw_inputs(self, archive_path: str):
        """Archives the raw data to the workers to the given path

        Args:
            archive_path (str): the path to archive data to
        """

        if not isinstance(archive_path, str):
            raise ValueError(
                f'expected archive path is str, got {archive_path}')
        self.join()

        working_path = _get_working_dir(self.identifier)
        zipdir(working_path)
        os.rename(working_path + '.zip', archive_path)
        self.workers_spawned = 0
        self._prepared = False
    def load(cls, infile: str):
        """Loads the PR trajectory saved to the given filepath

        Arguments:
            infile (str): the filename to load from; should be a zip file
        """
        filename, folder = mutils.process_outfile(infile, exist_ok=True)
        if not os.path.exists(filename):
            raise FileNotFoundError(filename)
        unzip(filename)

        with open(os.path.join(folder, 'meta.json'), 'r') as meta_in:
            meta_dict = json.load(meta_in)
        overall = torch.load(os.path.join(folder, 'overall.pt'))
        by_label = None
        if os.path.exists(os.path.join(folder, 'by_label.pt')):
            by_label = torch.load(os.path.join(folder, 'by_label.pt'))
        zipdir(folder)
        return cls(overall=overall, layers=meta_dict['layers'], by_label=by_label)
    def load(cls, filepath: str, compress: bool = True):
        """Loads the clusters located in the given filepath. If the filepath has
        an extension it must be .zip and it will be ignored. This will first check
        if the folder exists and then the archive.

        Arguments:
            filepath (str): the path to the folder or archive that the clusters were saved in
            compress (bool): if True the folder will be compressed after this is done,
                regardless of the old state. If this is False, the folder will not be
                compressed after this is done, regardless of the old state.
        """

        outfile, outfile_wo_ext = mutils.process_outfile(filepath, True, False)

        if not os.path.exists(outfile_wo_ext):
            if not os.path.exists(outfile):
                raise FileNotFoundError(filepath)
            filetools.unzip(outfile)

        try:
            clusters_path = os.path.join(outfile_wo_ext, 'clusters.npz')
            if not os.path.exists(clusters_path):
                raise FileNotFoundError(clusters_path)

            calc_params_path = os.path.join(outfile_wo_ext,
                                            'calculate_params.json')
            if not os.path.exists(calc_params_path):
                raise FileNotFoundError(calc_params_path)

            with np.load(clusters_path) as clusters:
                samples = clusters['samples']
                centers = clusters['centers']
                labels = clusters['labels']

            with open(calc_params_path, 'r') as infile:
                calculate_params = json.load(infile)

            return Clusters(samples, centers, labels, calculate_params)
        finally:
            if compress and os.path.exists(outfile_wo_ext):
                filetools.zipdir(outfile_wo_ext)
Beispiel #11
0
def measure_dtt_ff(model: FeedforwardNetwork, pwl_prod: PointWithLabelProducer,
                   outfile: str, exist_ok: bool = False,
                   logger: typing.Optional[logging.Logger] = None,
                   verbose: bool = False) -> None:
    """Analogue to measure_dtt for feed-forward networks"""
    if not isinstance(model, FeedforwardNetwork):
        raise ValueError(f'expected model is FeedforwardNetwork, got {model} (type={type(model)})')
    if not isinstance(pwl_prod, PointWithLabelProducer):
        raise ValueError(f'expected pwl_prod is PointWithLabelProducer, got {pwl_prod} (type={type(pwl_prod)})')
    if not isinstance(outfile, str):
        raise ValueError(f'expected outfile is str, got {outfile}')
    if not isinstance(exist_ok, bool):
        raise ValueError(f'expected exist_ok is bool, got {exist_ok}')
    if logger is not None and not isinstance(logger, logging.Logger):
        raise ValueError(f'expected logger is optional[logging.Logger], got {logger} (type={type(logger)})')
    if not isinstance(verbose, bool):
        raise ValueError(f'expected verbose is bool, got {verbose} (type={type(verbose)})')

    outfile_wo_ext = os.path.splitext(outfile)[0]
    if outfile_wo_ext == outfile:
        outfile = outfile_wo_ext + '.zip'

    if os.path.exists(outfile_wo_ext):
        raise FileExistsError(f'for outfile={outfile}, need {outfile_wo_ext} as working space')
    if not exist_ok and os.path.exists(outfile):
        raise FileExistsError(f'outfile {outfile} already exists (use exist_ok=True) to overwrite')

    num_samples = min(pwl_prod.epoch_size, 50 * pwl_prod.output_dim)

    sample_points = torch.zeros((num_samples, model.input_dim), dtype=torch.double)
    sample_labels = torch.zeros((num_samples,), dtype=torch.long)
    hid_acts = [] # each will be 2d tensor
    within_dists = [] # each value corresponds to a torch tensor of within dists
    within_means = torch.zeros(model.num_layers+1, dtype=torch.double)
    within_stds = torch.zeros(model.num_layers+1, dtype=torch.double)
    within_sems = torch.zeros(model.num_layers+1, dtype=torch.double)
    across_dists = [] # each value corresponds to a torch tensor of across dists
    across_means = torch.zeros(model.num_layers+1, dtype=torch.double)
    across_stds = torch.zeros(model.num_layers+1, dtype=torch.double)
    across_sems = torch.zeros(model.num_layers+1, dtype=torch.double)

    pwl_prod.mark()
    pwl_prod.fill(sample_points, sample_labels)
    pwl_prod.reset()

    def on_hidacts(acts_info: FFHiddenActivations):
        hidden_acts = acts_info.hidden_acts
        layer = acts_info.layer

        hid_acts.append(hidden_acts.detach())

        within, across = measure_instant(hid_acts[layer], sample_labels, pwl_prod.output_dim)
        within_dists.append(within)
        across_dists.append(across)

        within_means[layer] = within.mean()
        within_stds[layer] = within.std()
        within_sems[layer] = within_stds[layer] / np.sqrt(num_samples)

        across_means[layer] = across.mean()
        across_stds[layer] = across.std()
        across_sems[layer] = across_stds[layer] / np.sqrt(num_samples)

    _dbg(verbose, logger, 'measure_dtt_ff getting raw data')
    model(sample_points, on_hidacts)

    layers = np.arange(model.num_layers+1)

    _plot_dtt_ff(layers, within_means, within_stds, within_sems,
                 across_means, across_stds, across_sems,
                 within_dists, across_dists, outfile_wo_ext,
                 verbose, logger)
    _save_dtt_ff(sample_points, sample_labels, hid_acts,
                 within_dists, across_dists, outfile_wo_ext)

    if os.path.exists(outfile):
        os.remove(outfile)

    zipdir(outfile_wo_ext)
Beispiel #12
0
def digest_ff_activations(
        sample_points: np.ndarray, sample_labels: np.ndarray, output_dim: int,
        *hid_acts: typing.List[np.ndarray], outfile: str, exist_ok: bool):
    """This is a digest targettable version of the measure_dtt_ff, which accepts the
    hidden activations in the layer and stores plots to the given outfile and exist_ok

    Args:
        sample_points (ndarray): the sample points that we used to get layer acts
        sample_labels (ndarray): the sample labels that we used to get layer acts
        hid_acts (list[ndarray]): the hidden activations across each layer
        outfile (str): where to store the plots and data
        exist_ok (bool): True to overwrite, False to error when file exists
    """
    if exist_ok is None:
        raise ValueError(f'expected exist_ok is bool, got {exist_ok} (are you missing some arguments?)')
    if not isinstance(output_dim, int):
        raise ValueError(f'expected output_dim is int, got {output_dim}')

    sample_points = torch.from_numpy(sample_points).double()
    sample_labels = torch.from_numpy(sample_labels).int()
    hid_acts = [torch.from_numpy(hid_act).double() for hid_act in hid_acts]

    outfile_wo_ext = os.path.splitext(outfile)[0]
    if outfile == outfile_wo_ext:
        outfile += '.zip'

    if os.path.exists(outfile_wo_ext):
        raise FileExistsError(f'for outfile={outfile}, need {outfile_wo_ext} as working space')
    if not exist_ok and os.path.exists(outfile):
        raise FileExistsError(f'outfile {outfile} already exists (use exist_ok=True) to overwrite')

    num_samples = sample_points.shape[0]

    within_dists = [] # each value corresponds to a torch tensor of within dists
    within_means = torch.zeros(len(hid_acts), dtype=torch.double)
    within_stds = torch.zeros(len(hid_acts), dtype=torch.double)
    within_sems = torch.zeros(len(hid_acts), dtype=torch.double)
    across_dists = [] # each value corresponds to a torch tensor of across dists
    across_means = torch.zeros(len(hid_acts), dtype=torch.double)
    across_stds = torch.zeros(len(hid_acts), dtype=torch.double)
    across_sems = torch.zeros(len(hid_acts), dtype=torch.double)

    for layer, layer_acts in enumerate(hid_acts):
        within, across = measure_instant(layer_acts, sample_labels, output_dim)
        within_dists.append(within)
        across_dists.append(across)

        within_means[layer] = within.mean()
        within_stds[layer] = within.std()
        within_sems[layer] = within_stds[layer] / np.sqrt(num_samples)

        across_means[layer] = across.mean()
        across_stds[layer] = across.std()
        across_sems[layer] = across_stds[layer] / np.sqrt(num_samples)

    layers = np.arange(len(hid_acts))

    _plot_dtt_ff(layers, within_means, within_stds, within_sems,
                 across_means, across_stds, across_sems,
                 within_dists, across_dists, outfile_wo_ext,
                 False, None)
    _save_dtt_ff(sample_points, sample_labels, hid_acts,
                 within_dists, across_dists, outfile_wo_ext)

    if os.path.exists(outfile):
        os.remove(outfile)

    zipdir(outfile_wo_ext)
Beispiel #13
0
def plot_traj_ff(traj: SVMTrajectory, outfile: str, exist_ok: bool = False):
    """Plots the given trajectory to the given file. The file should have no extension or
    have the .zip extension.

    Args:
        traj (SVMTrajectory): the trajectory to plot
        outfile (str): where to save the plot (will be zipped)
        exist_ok (bool, optional): Defaults to False. if existing files should be overwritten
    """
    if not isinstance(traj, SVMTrajectory):
        raise ValueError(f'expected traj is SVMTrajectory, got {traj}')
    if not isinstance(outfile, str):
        raise ValueError(f'expected outfile is str, got {outfile}')
    if not isinstance(exist_ok, bool):
        raise ValueError(f'expected exist_ok is bool, got {exist_ok}')

    outfile_wo_ext = os.path.splitext(outfile)[0]
    if outfile == outfile_wo_ext:
        outfile += '.zip'

    if os.path.exists(outfile_wo_ext):
        raise FileExistsError(f'need {outfile_wo_ext} as working space to create {outfile}')

    if not exist_ok and os.path.exists(outfile):
        raise FileExistsError(f'{outfile} already exists (use exist_ok=True to overwrite)')

    os.makedirs(outfile_wo_ext)

    xlabel = 'Layers'
    ylabel = 'SVM Accuracy (%)'

    layers = np.arange(traj.overall.shape[0])
    num_labels = int(traj.by_label_vs_all.shape[1]) if traj.by_label_vs_all is not None else 2
    chance_perc = 1.0 / num_labels

    fig, ax = plt.subplots()

    ax.set_title(f'{ylabel} Through {xlabel} (Overall)')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    if traj.by_label_vs_all is not None:
        for lbl in range(traj.by_label_vs_all.shape[1]):
            ax.plot(layers, traj.by_label_vs_all[:, lbl].numpy(), linestyle='dashed', label=f'{lbl} vs all', alpha=0.6)
    ax.plot(layers, traj.overall.numpy(), label='Overall')
    ax.set_xticks(layers)
    ax.legend(loc=1)

    fig.savefig(os.path.join(outfile_wo_ext, 'overall.png'))
    plt.close(fig)

    fig, ax = plt.subplots()
    ax.set_title(f'{ylabel} Through {xlabel} (All Only)')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.axhline(chance_perc, layers.min().item(), layers.max().item(), linestyle='dashed', color='k', label='Chance Acc.', alpha=0.6)
    ax.plot(layers, traj.overall.numpy(), label='Overall')
    ax.set_xticks(layers)
    ax.legend(loc=1)

    fig.savefig(os.path.join(outfile_wo_ext, 'allonly.png'))
    plt.close(fig)

    fig, ax = plt.subplots() # previous plot with consistent scale
    ax.set_title(f'{xlabel} Through {ylabel} (All Only)')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.axhline(chance_perc, layers.min(), layers.max(), linestyle='dashed', color='k', label='Chance Acc.', alpha=0.6)
    ax.plot(layers, traj.overall.numpy(), label='Overall')
    ax.set_xticks(layers)
    ax.set_ylim(0, 1)
    ax.legend(loc=1)

    fig.savefig(os.path.join(outfile_wo_ext, 'allonly_0_1_scale.png'))
    plt.close(fig)

    if traj.by_label_vs_all is not None:
        best_square = int(np.ceil(np.sqrt(num_labels)))
        num_cols = best_square
        num_rows = int(np.ceil(num_labels / best_square))
        fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False, sharey='all', sharex='all')
        chance_perc = (num_labels - 1) / num_labels

        fig.suptitle(f'{xlabel} through {ylabel} (By Label 1 vs All)')
        lbl = 0
        for row in range(num_rows):
            for col in range(num_cols):
                ax = axes[row][col]
                if lbl >= num_labels:
                    ax.remove()
                    continue

                yvals = traj.by_label_vs_all[:, lbl].numpy()
                ax.set_title(str(lbl))
                ax.plot(layers, yvals, label=str(lbl))
                ax.axhline(chance_perc, layers.min(), layers.max(), linestyle='dashed', color='k', label='Chance Acc.', alpha=0.6)
                lbl += 1

        axes[0][0].set_xticks(layers)
        fig.savefig(os.path.join(outfile_wo_ext, 'by_label.png'))
        plt.close(fig)

    if exist_ok and os.path.exists(outfile):
        os.remove(outfile)
    zipdir(outfile_wo_ext)
    def save(self,
             filepath: str,
             exist_ok: bool = False,
             compress: bool = True) -> None:
        """Saves these clusters along with a description about how to load them
        to the given filepath. If the filepath has an extension, it must be .zip
        and it will be ignored in favor of compress.

        Arguments:
            filepath (str): the folder or zip file where these clusters should be
                saves
            exist_ok (bool): effects the behavior if the folder or zip file already
                exists. If this is False, then an error is thrown. If this is True,
                the existing files are deleted
            compress (bool): if True, the folder is compressed to a zip file after
                saving and the folder is deleted. If False, the result is left as a
                folder
        """

        outfile, outfile_wo_ext = mutils.process_outfile(
            filepath, exist_ok, compress)

        if os.path.exists(outfile_wo_ext):
            filetools.deldir(outfile_wo_ext)

        os.makedirs(outfile_wo_ext)

        np.savez_compressed(os.path.join(outfile_wo_ext, 'clusters.npz'),
                            samples=self.samples,
                            centers=self.centers,
                            labels=self.labels)

        with open(os.path.join(outfile_wo_ext, 'calculate_params.json'),
                  'w') as out:
            json.dump(self.calculate_params, out)

        with open(os.path.join(outfile_wo_ext, 'readme.md'), 'w') as out:

            def _print(*args, **kwargs):
                print(*args, **kwargs, file=out)

            _print('Clusters')
            _print('  clusters.npz:')
            _print(
                '    samples [n_samples, n_features] - the samples the clusters were calculated'
                + ' from')
            _print(
                '    centers [n_clusters, n_features] - the centers of the clusters'
            )
            _print(
                '    labels [n_samples] - the index in centers for the closest cluster '
                + 'to each label')
            _print('  calculate_params.json:')
            _print(
                '    Varies. Gives information about how clusters were calculated'
            )

        if compress:
            if os.path.exists(outfile):
                os.remove(outfile)
            filetools.zipdir(outfile_wo_ext)
Beispiel #15
0
def plot_trajectory(traj: PCTrajectoryGen, filepath: str, exist_ok: bool = False,
                    alpha: float = 0.5, square: bool = True, transparent: bool = True,
                    s: int = 1, ots: OutputToScalarMapping = SqueezeOTSMapping(),
                    cmap: typing.Union[mcolors.Colormap, str] = 'cividis',
                    norm: mcolors.Normalize = mcolors.Normalize(-1, 1),
                    compress: bool = False):
    """Plots the given trajectory by storing it in the given filepath. If the output of
    the trajectory is not itself a scalar, the output to scalar mapping must be set.
    The other arguments are related to display.

    Args:
        traj (PCTrajectoryGen): The trajectory to plot. Must have at least 2 pcs
        filepath (str): Where to store the given trajectory, either a folder or a zip file.
            The file zip extension will only be used if compress is true
        exist_ok (bool, optional): If the filepath already exists, then this determines if it
            should be overwritten (True) or an error should be raised (False). Defaults to False.
        alpha (float, optional): The transparency value for each vector. Defaults to 0.5.
        square (bool, optional): If the dimensions of the space should be equal for width and
            height (such that 1 inch width and height visually corresponds to the same amount of
            distance in pc-space). Since pc space is naturally rectangular, not setting this
            can easily lead to misinterpretations. Defaults to True.
        transparent (bool, optional): Determines the background color of the saved images, where
            True is transparency and False is near-white. Defaults to True.
        s (int, optional): The size of each projected sample. Defaults to 1.
        ots (OutputToScalarMapping, optional): Maps the labels of the trajectory to samples which
            are then converted to colors using the color map. Defaults to SqueezeOTSMapping().
        cmap (str or Colormap, optional): The color map to use. Defaults to 'cividis'.
        norm (mcolors.Normalize, optional): Normalizes the scalars that are passed to the color
            map to the range 0-1. Defaults to normalizing linearly from [-1, 1] to [0, 1]
        compress (bool): if the folder should be zipped
    """
    tus.check(
        traj=(traj, PCTrajectoryGen),
        filepath=(filepath, str),
        exist_ok=(exist_ok, bool),
        alpha=(alpha, float),
        square=(square, bool),
        transparent=(transparent, bool),
        s=(s, int),
        ots=(ots, OutputToScalarMapping),
        cmap=(cmap, (str, mcolors.Colormap))
    )

    outfile, outfile_wo_ext = mutils.process_outfile(filepath, exist_ok, compress)
    if not compress and exist_ok and os.path.exists(outfile_wo_ext):
        filetools.deldir(outfile_wo_ext)
    os.makedirs(outfile_wo_ext)

    num_splots_req = traj.num_layers + 1
    closest_square: int = int(np.ceil(np.sqrt(num_splots_req)))
    num_cols: int = int(math.ceil(num_splots_req / closest_square))
    local_fig, local_axs = plt.subplots(num_cols, closest_square, squeeze=False, figsize=FRAME_SIZE)

    layer: int = 0
    for x in range(num_cols):
        for y in range(closest_square):
            if layer >= num_splots_req:
                local_axs[x][y].remove()
                continue
            elif layer >= traj.num_layers:
                lspace = np.linspace(norm.vmin, norm.vmax, 100)
                axis = local_axs[x][y]
                axis.tick_params(axis='both', which='both', bottom=False, left=False, top=False,
                                 labelbottom=False, labelleft=False)
                axis.imshow(lspace[..., np.newaxis], cmap=cmap, norm=norm, aspect=0.2)
                layer += 1
                continue
            snapshot: PCTrajectoryGenSnapshot = traj[layer]

            projected = snapshot.projected_samples
            projected_lbls = snapshot.projected_sample_labels

            min_x, min_y, max_x, max_y = (torch.min(projected[:, 0]), torch.min(projected[:, 1]),
                                          torch.max(projected[:, 0]), torch.max(projected[:, 1]))
            min_x, min_y, max_x, max_y = min_x.item(), min_y.item(), max_x.item(), max_y.item()

            if max_x - min_x < 1e-3:
                min_x -= 5e-4
                max_x += 5e-4
            if max_y - min_y < 1e-3:
                min_y -= 5e-4
                max_y += 5e-4
            if square:
                extents_x = max_x - min_x
                extents_y = max_y - min_y
                if extents_x > extents_y:
                    upd = (extents_x - extents_y) / 2
                    min_y -= upd
                    max_y += upd
                else:
                    upd = (extents_y - extents_x) / 2
                    min_x -= upd
                    max_x += upd
            padding_x = (max_x - min_x) * .1
            padding_y = (max_y - min_y) * .1

            vis_min_x = min_x - padding_x
            vis_max_x = max_x + padding_x
            vis_min_y = min_y - padding_y
            vis_max_y = max_y + padding_y

            projected_colors = ots(projected_lbls)
            axis = local_axs[x][y]
            axis.scatter(projected[:, 0].numpy(), projected[:, 1].numpy(),
                         s=s, alpha=alpha, c=projected_colors.numpy(),
                         cmap=mcm.get_cmap(cmap), norm=norm)
            axis.set_xlim([vis_min_x, vis_max_x])
            axis.set_ylim([vis_min_y, vis_max_y])
            axis.tick_params(axis='both', which='both', bottom=False, left=False, top=False,
                             labelbottom=False, labelleft=False)
            layer += 1

    local_path = os.path.join(outfile_wo_ext, 'local.png')
    local_fig.tight_layout()
    local_fig.savefig(local_path, transparent=transparent, DPI=DPI)

    np.savez(os.path.join(outfile_wo_ext, 'principal_vectors.npz'),
             *[snapshot.principal_vectors for snapshot in traj])
    np.savez(os.path.join(outfile_wo_ext, 'principal_values.npz'),
             *[snapshot.principal_values for snapshot in traj])
    np.savez(os.path.join(outfile_wo_ext, 'projected_samples.npz'),
             *[snapshot.projected_samples for snapshot in traj])
    np.savez(os.path.join(outfile_wo_ext, 'projected_sample_labels.npz'),
             *[snapshot.projected_sample_labels for snapshot in traj])

    if compress:
        if os.path.exists(outfile):
            os.remove(outfile)

        filetools.zipdir(outfile_wo_ext)
Beispiel #16
0
def replot_dtt_ff(infile: str, verbose: bool = True, logger: logging.Logger = None):
    """Recreates the dtt_ff plots for the given zip, replacing them inside the zip.

    Args:
        infile (str): the outfile that you used when measuring
        verbose (bool): if this should print progress information
        logger (Logger): the logger to use, None for print
    """
    if not isinstance(infile, str):
        raise ValueError(f'expected infile is str, got {infile} (type={type(infile)})')
    if not isinstance(verbose, bool):
        raise ValueError(f'expected verbose is bool, got {verbose} (type={type(verbose)})')
    if logger is not None and not isinstance(logger, logging.Logger):
        raise ValueError(f'expected logger is optional[logging.Logger], got {logger} (type={type(logger)})')

    _dbg(verbose, logger, f'unpacking {infile}')
    unzip(infile)

    infile_wo_ext = os.path.splitext(infile)[0]
    _dbg(verbose, logger, f'fetching data')
    try:
        within_dists, across_dists = [], []
        num_samples: int

        with np.load(os.path.join(infile_wo_ext, 'within.npz')) as within_dict:
            i = 0
            while f'arr_{i}' in within_dict:
                within_dists.append(within_dict[f'arr_{i}'])
                i += 1

        with np.load(os.path.join(infile_wo_ext, 'across.npz')) as across_dict:
            i = 0
            while f'arr_{i}' in across_dict:
                across_dists.append(across_dict[f'arr_{i}'])
                i += 1

        with np.load(os.path.join(infile_wo_ext, 'sample.npz')) as sample_dict:
            num_samples = sample_dict['sample_labels'].shape[0]

        num_layers = len(within_dists) - 1
        if len(across_dists) != num_layers + 1:
            raise ValueError(f'expected within_dists has same len as across_dists, but len(within_dists)={len(within_dists)}, len(across_dists)={len(across_dists)}')


        within_means = torch.zeros(num_layers+1, dtype=torch.double)
        within_stds = torch.zeros(num_layers+1, dtype=torch.double)
        within_sems = torch.zeros(num_layers+1, dtype=torch.double)
        across_means = torch.zeros(num_layers+1, dtype=torch.double)
        across_stds = torch.zeros(num_layers+1, dtype=torch.double)
        across_sems = torch.zeros(num_layers+1, dtype=torch.double)

        for i in range(num_layers+1):
            within_means[i] = within_dists[i].mean()
            within_stds[i] = within_dists[i].std()
            within_sems[i] = within_stds[i] / np.sqrt(num_samples)
            across_means[i] = across_dists[i].mean()
            across_stds[i] = across_dists[i].std()
            across_sems[i] = across_stds[i] / np.sqrt(num_samples)

        layers = np.arange(num_layers+1)

        _plot_dtt_ff(layers, within_means, within_stds, within_sems,
                    across_means, across_stds, across_sems,
                    within_dists, across_dists, infile_wo_ext,
                    verbose, logger)
    finally:
        _dbg(verbose, logger, f'repacking {infile}')
        zipdir(infile_wo_ext)
def plot_pr_trajectory(traj: PRTrajectory, savepath: str, exist_ok: bool = False,
                       label_map: typing.Optional[typing.Dict[int, str]] = None):
    """Plots the given trajectory and saves it to the given zip archive

    Args:
        traj (PRTrajectory): The trajectory to plot
        savepath (str): Where to save the trajectory
        exist_ok (bool, optional): Defaults to False. if we should overwrite
        label_map (dict[int, str], doptional): Defaults to None. If specified,
            these are the display names for the labels. Defaults to just the
            string representation of the label. May omit any or all labels
    """
    if not isinstance(traj, PRTrajectory):
        raise ValueError(f'expected traj is PRTrajectory, got {traj} (type={type(traj)})')
    if not isinstance(savepath, str):
        raise ValueError(f'expected savepath is str, got {savepath} (type={type(savepath)})')
    if not isinstance(exist_ok, bool):
        raise ValueError(f'expected exist_ok is bool, got {exist_ok} (type={type(exist_ok)})')

    if label_map is None and traj.by_label is not None:
        label_map = dict((lbl, str(lbl)) for lbl in range(len(traj.by_label)))
    elif traj.by_label is not None:
        if not isinstance(label_map, dict):
            raise ValueError(f'expected label_map is dict, got {label_map} (type={type(label_map)})')
        for lbl in range(len(traj.by_label)):
            if lbl not in label_map:
                label_map[lbl] = str(lbl)

    savepath_wo_ext = os.path.splitext(savepath)[0]
    if savepath == savepath_wo_ext:
        savepath += '.zip'

    if os.path.exists(savepath_wo_ext):
        raise FileExistsError(f'to save at {savepath}, {savepath_wo_ext} must be empty but it already exists')
    if not exist_ok and os.path.exists(savepath):
        raise FileExistsError(f'cannot save at {savepath} (already exists). set exist_ok=True to overwrite')

    os.makedirs(savepath_wo_ext)

    through_str = 'Layers' if traj.layers else 'Time'
    x_label = through_str
    y_label = 'Participation Ratio'

    x_vals = np.arange(traj.overall.shape[0])

    fig, axs = plt.subplots()
    axs.set_title(f'PR Through {through_str} (Global)')
    axs.set_xlabel(x_label)
    axs.set_ylabel(y_label)

    axs.plot(x_vals, traj.overall.numpy())
    axs.set_xticks(x_vals)

    fig.tight_layout()
    fig.savefig(os.path.join(savepath_wo_ext, 'global.png'))
    plt.close(fig)

    fig, axs = plt.subplots()
    axs.set_title(f'PR Through {through_str} (All)')
    axs.set_xlabel(x_label)
    axs.set_ylabel(y_label)

    if traj.by_label is not None:
        for lbl, y_vals in enumerate(traj.by_label):
            axs.plot(x_vals, y_vals.numpy(), '--', label=label_map[lbl], alpha=0.6)

    axs.plot(x_vals, traj.overall.numpy(), label='Overall', alpha=1)

    axs.set_xticks(x_vals)

    axs.legend()
    fig.tight_layout()
    fig.savefig(os.path.join(savepath_wo_ext, 'all.png'))
    plt.close(fig)

    if traj.by_label is not None:
        for lbl, y_vals in enumerate(traj.by_label):
            fig, axs = plt.subplots()
            axs.set_title(f'PR Through {through_str} ({label_map[lbl]})')
            axs.set_xlabel(x_label)
            axs.set_ylabel(y_label)
            axs.plot(x_vals, y_vals.numpy())
            axs.set_xticks(x_vals)
            fig.tight_layout()
            fig.savefig(os.path.join(savepath_wo_ext, f'{lbl}.png'))
            plt.close(fig)

    traj.save(os.path.join(savepath_wo_ext, 'traj.zip'))
    if os.path.exists(savepath):
        os.remove(savepath)

    zipdir(savepath_wo_ext)