Exemple #1
0
def write_data(fn, columns, data):
    """Write DataFrame out to either a CSV or FCS file."""
    if args.format == 'csv':
        data.columns = columns
        data.to_csv(fn, columns=columns, index=False)
    elif args.format == 'fcs':
        fcswrite.write_fcs(fn, columns, data, compat_chn_names=False, compat_percent=False, compat_negative=False)
    else:
        raise Exception("Improper format passed to get_data")
Exemple #2
0
def output_cluster(inputfiles):
    """Use already trained model to output clustered data."""
    try:
        model_dir = os.path.join(args.output_dir, 'models', 'clustered')
        data_dir = os.path.join(args.output_dir, 'clustered')
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.mkdir(data_dir)

        tf.reset_default_graph()
        saucie = SAUCIE(None, restore_folder=model_dir)

        print("Finding all binary codes")
        all_codes = {}
        for counter, f in enumerate(inputfiles):
            x = get_data(f)
            load = Loader(data=x, shuffle=False)

            acts = saucie.get_layer(load, 'layer_c')
            acts = acts / acts.max()
            binarized = np.where(acts > .000001, 1, 0)

            unique_rows, counts = np.unique(binarized, axis=0, return_counts=True)
            for unique_row in unique_rows:
                unique_row = tuple(unique_row.tolist())
                if unique_row not in all_codes:
                    all_codes[unique_row] = len(all_codes)

        print("Found {} clusters".format(len(all_codes)))

        print("Starting to output {} clustered files...".format(len(inputfiles)))
        for counter, f in enumerate(inputfiles):
            fname = os.path.split(f)[-1]
            print("Outputing file {}".format(counter))
            x = get_data(f)
            load = Loader(data=x, shuffle=False)
            acts = saucie.get_layer(load, 'layer_c')
            acts = acts / acts.max()
            binarized = np.where(acts > .000001, 1, 0)

            clusters = -1 * np.ones(x.shape[0])
            for code in all_codes:
                rows_equal_to_this_code = np.where(np.all(binarized == code, axis=1))[0]
                clusters[rows_equal_to_this_code] = all_codes[code]

            embeddings = saucie.get_layer(load, 'embeddings')

            rawdata = get_data(f, return_rawfile=True)
            outcols = rawdata.columns.tolist() + ['Cluster', 'Embedding_SAUCIE1', 'Embedding_SAUCIE2']
            rawdata = pd.concat([rawdata, pd.DataFrame(clusters), pd.DataFrame(embeddings[:, 0]), pd.DataFrame(embeddings[:, 1])], axis=1)
            outfile = os.path.join(data_dir, fname)
            fcswrite.write_fcs(outfile, outcols, rawdata)

    except Exception as ex:
        # if it didn't run all the way through, clean everything up and remove it
        shutil.rmtree(data_dir)
        raise(ex)
Exemple #3
0
    def fcs(self, path, features, filtered=True, override=False):
        """Export the data of an RT-DC dataset to an .fcs file

        Parameters
        ----------
        mm: instance of dclab.RTDCBase
            The dataset that will be exported.
        path: str
            Path to a .tsv file. The ending .tsv is added automatically.
        features: list of str
            The features in the resulting .tsv file. These are strings
            that are defined in `dclab.definitions.scalar_feature_names`,
            e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
        filtered: bool
            If set to `True`, only the filtered data (index in ds._filter)
            are used.
        override: bool
            If set to `True`, an existing file ``path`` will be overridden.
            If set to `False`, raises `OSError` if ``path`` exists.

        Notes
        -----
        Due to incompatibility with the .fcs file format, all events with
        NaN-valued features are not exported.
        """
        features = [c.lower() for c in features]
        ds = self.rtdc_ds

        path = pathlib.Path(path)
        # Make sure that path ends with .fcs
        if path.suffix != ".fcs":
            path = path.with_name(path.name + ".fcs")
        # Check if file already exist
        if not override and path.exists():
            raise OSError("File already exists: {}\n".format(
                str(path).encode("ascii", "ignore")) +
                "Please use the `override=True` option.")
        # Check that features are in dfn.scalar_feature_names
        for c in features:
            if c not in dfn.scalar_feature_names:
                msg = "Unknown or unsupported feature name: {}".format(c)
                raise ValueError(msg)

        # Collect the header
        chn_names = [dfn.feature_name2label[c] for c in features]

        # Collect the data
        if filtered:
            data = [ds[c][ds._filter] for c in features]
        else:
            data = [ds[c] for c in features]

        data = np.array(data).transpose()
        fcswrite.write_fcs(filename=str(path),
                           chn_names=chn_names,
                           data=data)
Exemple #4
0
def output_batch_correction(rawfiles):
    """Use already trained models to output batch corrected data."""
    try:
        model_dir = os.path.join(args.output_dir, 'models', 'batch_corrected')
        data_dir = os.path.join(args.output_dir, 'batch_corrected')
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.mkdir(data_dir)
        ref = rawfiles[0]
        refx = get_data(ref)
        refname = os.path.split(ref)[-1]

        print("Starting to output {} batch corrected files...".format(len(rawfiles)))
        for counter, nonref in enumerate(rawfiles[1:]):
            nonrefname = os.path.split(nonref)[-1]
            print("Outputing file {}".format(counter))

            nonrefx = get_data(nonref)
            alldata = np.concatenate([refx.as_matrix(), nonrefx.as_matrix()], axis=0)
            alllabels = np.concatenate([np.zeros(refx.shape[0]), np.ones(nonrefx.shape[0])], axis=0)

            load = Loader(data=alldata, labels=alllabels, shuffle=False)

            tf.reset_default_graph()
            restore_folder = os.path.join(model_dir, nonrefname)
            saucie = SAUCIE(None, restore_folder=restore_folder)

            recon, labels = saucie.get_layer(load, 'output')

            recon = sinh(recon)

            # write out reference file
            if counter == 0:
                reconref = recon[labels == 0]
                rawdata = get_data(ref, return_rawfile=True)
                for ind, c in enumerate(args.cols):
                    rawdata.iloc[:, c] = reconref[:, ind]

                outfileref = os.path.join(data_dir, refname)
                fcswrite.write_fcs(outfileref, rawdata.columns.tolist(), rawdata)

            # write out nonreference file
            reconnonref = recon[labels == 1]
            rawdata = get_data(nonref, return_rawfile=True)
            for ind, c in enumerate(args.cols):
                rawdata.iloc[:, c] = reconnonref[:, ind]
            outfilenonref = os.path.join(data_dir, nonrefname)
            fcswrite.write_fcs(outfilenonref, rawdata.columns.tolist(), rawdata)

    except Exception as ex:
        # if it didn't run all the way through, clean everything up and remove it
        shutil.rmtree(data_dir)
        raise(ex)
Exemple #5
0
def aggregate_cytometry_statistics(output_dir,
                                   config,
                                   mode='all',
                                   export_csv=True,
                                   export_fcs=True,
                                   variant=None):
    from cytokit.function import data as function_data

    # Aggregate all cytometry csv data (across tiles)
    res = function_data.get_cytometry_data(output_dir, config, mode=mode)

    # Get file extension, possibly with user-defined "variant" name to be included in all
    # resulting file names
    def ext(file_ext):
        return file_ext if variant is None else '{}.{}'.format(
            variant, file_ext)

    # Export result as csv
    csv_path, fcs_path = None, None
    if export_csv:
        csv_path = osp.join(output_dir,
                            cytokit_io.get_cytometry_agg_path(ext('csv')))
        cytokit_io.save_csv(csv_path, res, index=False)
        logger.info(
            'Saved cytometry aggregation results to csv at "{}"'.format(
                csv_path))
    if export_fcs:
        import re
        import fcswrite
        nonalnum = '[^0-9a-zA-Z]+'

        fcs_path = osp.join(output_dir,
                            cytokit_io.get_cytometry_agg_path(ext('fcs')))
        if len(res) > 0:
            # For FCS exports, save only integer and floating point values and replace any non-alphanumeric
            # column name characters with underscores
            res_fcs = res.select_dtypes(
                ['int',
                 'float']).rename(columns=lambda c: re.sub(nonalnum, '_', c))
            if not osp.exists(osp.dirname(fcs_path)):
                os.makedirs(osp.dirname(fcs_path), exist_ok=True)
            fcswrite.write_fcs(filename=fcs_path,
                               chn_names=res_fcs.columns.tolist(),
                               data=res_fcs.values)
            logger.info(
                'Saved cytometry aggregation results to fcs at "{}"'.format(
                    fcs_path))
        else:
            # fcswrite fails on writing empty datasets so log a warning instead
            logger.warning(
                'Skipping FCS export because no objects were detected')
    return csv_path, fcs_path
def write_debarcoded_files(path, three_barcode_df_all_data, unique_row,
                           barcode_ind, tag_name):
    for ibarcode, barcode in enumerate(barcode_ind):
        if ibarcode == len(unique_row) - 1:
            saving_df = three_barcode_df_all_data.iloc[
                unique_row[ibarcode]:, :]
        else:
            saving_df = three_barcode_df_all_data.iloc[
                unique_row[ibarcode]:unique_row[ibarcode + 1], :]

        saving_name = '-'.join(np.array(tag_name)[barcode]) + '.fcs'
        write_fcs(path + saving_name, list(three_barcode_df_all_data.columns),
                  saving_df)
    print('Successfully wrote out debarcoded data into FCS files')
Exemple #7
0
    def write_fcs(self,
                  path: Union[str, Path],
                  cell_properties: Union[bool, Sequence[str]] = False,
                  cell_channel_properties: Union[bool, Sequence[str]] = False,
                  **kwargs):
        """Writes an FCS file, see :func:`to_dataframe` for format specifications

        Uses :func:`fcswrite.write_fcs` for writing FCS 3.0 files.

        :param path: path to the .fcs file to be written
        :param cell_properties: list of cell properties (e.g. regionprops) to include; set to ``True`` to include all
        :param cell_channel_properties: list of cell channel properties (e.g. intensity values) to include; set to
            ``True`` to include all
        :param kwargs: other arguments passed to :func:`fcswrite.write_fcs`
        """
        if fcswrite is None:
            raise RuntimeError('fcswrite is not installed')
        df = self.to_dataframe(cell_properties=cell_properties,
                               cell_channel_properties=cell_channel_properties)
        fcswrite.write_fcs(path, df.columns.values, df.values, **kwargs)
Exemple #8
0
def fcs_write(df, fname):
    """Write the merged FCS data into a new file
    given the FCS data, creates a new file in FCS3.0 format

    Args:
        df: FCS dataframe
        fname: Output file name

    Returns:
        new event count for the merged file and channel names
    """

    data = df.to_numpy()
    channels = list(df.columns)

    # replace any spaces in the marker names to "-" for consistency
    channels = [x.replace(" ", "-") for x in channels]

    fcswrite.write_fcs(filename=fname, chn_names=channels, data=data)
    event_count = data.shape[0]
    return channels, event_count
Exemple #9
0
        print("MISSING CELL_INDEX")
        f_reduced.reset_index(inplace=True)
        f_reduced.rename({"index": "Cell_Index"}, axis="columns", inplace=True)
    f_reduced["Cell_Index"] = pd.to_numeric(f_reduced["Cell_Index"])
    print(f_reduced)  #Print final dataframe

    #Saving files#:
    if i in txt_filelist:
        f_reduced.to_csv(f"{output_dir}/{info_run}/Pro_{i}",
                         index=False,
                         sep='\t')
        # index = False to be compatible with Cytobank
        if txt_sopts:
            #SAVE AS FCS
            fcswrite.write_fcs(f"{output_dir}/{info_run}/Pro_{i}.fcs",
                               chn_names=list(f_reduced.columns),
                               compat_chn_names=False,
                               data=f_reduced.to_numpy())

    else:
        # answ = yes_or_NO("File is an .fcs. Would you like to also save it as a .txt?",
        #             default=nonstandard_FCS)
        fcswrite.write_fcs(f"{output_dir}/{info_run}/Pro_{i}",
                           chn_names=list(f_reduced.columns),
                           compat_chn_names=False,
                           data=f_reduced.to_numpy())
        if fcs_sopts:
            print("Converting .fcs to .txt")
            f_reduced.to_csv(f"{output_dir}/{info_run}/Pro_{i}.txt",
                             index=False,
                             sep='\t')  #Changed to index=False
Exemple #10
0
    def fcs(self,
            path,
            features,
            meta_data=None,
            filtered=True,
            override=False):
        """Export the data of an RT-DC dataset to an .fcs file

        Parameters
        ----------
        path: str
            Path to an .fcs file. The ending .fcs is added automatically.
        features: list of str
            The features in the resulting .fcs file. These are strings
            that are defined by `dclab.definitions.scalar_feature_exists`,
            e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
        meta_data: dict
            User-defined, optional key-value pairs that are stored
            in the primary TEXT segment of the FCS file; the version
            of dclab is stored there by default
        filtered: bool
            If set to `True`, only the filtered data
            (index in ds.filter.all) are used.
        override: bool
            If set to `True`, an existing file ``path`` will be overridden.
            If set to `False`, raises `OSError` if ``path`` exists.

        Notes
        -----
        Due to incompatibility with the .fcs file format, all events with
        NaN-valued features are not exported.
        """
        if meta_data is None:
            meta_data = {}
        if not FCSWRITE_AVAILABLE:
            raise ModuleNotFoundError(
                "Package `fcswrite` required for fcs export!")
        features = [c.lower() for c in features]
        ds = self.rtdc_ds

        path = pathlib.Path(path)
        # Make sure that path ends with .fcs
        if path.suffix != ".fcs":
            path = path.with_name(path.name + ".fcs")
        # Check if file already exist
        if not override and path.exists():
            raise OSError("File already exists: {}\n".format(
                str(path).encode("ascii", "ignore")) +
                          "Please use the `override=True` option.")
        # Check that features are valid
        for c in features:
            if c not in ds.features_scalar:
                msg = "Invalid feature name: {}".format(c)
                raise ValueError(msg)

        # Collect the header
        chn_names = [dfn.get_feature_label(c, rtdc_ds=ds) for c in features]

        # Collect the data
        if filtered:
            data = [ds[c][ds.filter.all] for c in features]
        else:
            data = [ds[c] for c in features]

        data = np.array(data).transpose()
        meta_data["dclab version"] = version
        fcswrite.write_fcs(
            filename=str(path),
            chn_names=chn_names,
            data=data,
            text_kw_pr=meta_data,
        )