def write_data(fn, columns, data): """Write DataFrame out to either a CSV or FCS file.""" if args.format == 'csv': data.columns = columns data.to_csv(fn, columns=columns, index=False) elif args.format == 'fcs': fcswrite.write_fcs(fn, columns, data, compat_chn_names=False, compat_percent=False, compat_negative=False) else: raise Exception("Improper format passed to get_data")
def output_cluster(inputfiles): """Use already trained model to output clustered data.""" try: model_dir = os.path.join(args.output_dir, 'models', 'clustered') data_dir = os.path.join(args.output_dir, 'clustered') if os.path.exists(data_dir): shutil.rmtree(data_dir) os.mkdir(data_dir) tf.reset_default_graph() saucie = SAUCIE(None, restore_folder=model_dir) print("Finding all binary codes") all_codes = {} for counter, f in enumerate(inputfiles): x = get_data(f) load = Loader(data=x, shuffle=False) acts = saucie.get_layer(load, 'layer_c') acts = acts / acts.max() binarized = np.where(acts > .000001, 1, 0) unique_rows, counts = np.unique(binarized, axis=0, return_counts=True) for unique_row in unique_rows: unique_row = tuple(unique_row.tolist()) if unique_row not in all_codes: all_codes[unique_row] = len(all_codes) print("Found {} clusters".format(len(all_codes))) print("Starting to output {} clustered files...".format(len(inputfiles))) for counter, f in enumerate(inputfiles): fname = os.path.split(f)[-1] print("Outputing file {}".format(counter)) x = get_data(f) load = Loader(data=x, shuffle=False) acts = saucie.get_layer(load, 'layer_c') acts = acts / acts.max() binarized = np.where(acts > .000001, 1, 0) clusters = -1 * np.ones(x.shape[0]) for code in all_codes: rows_equal_to_this_code = np.where(np.all(binarized == code, axis=1))[0] clusters[rows_equal_to_this_code] = all_codes[code] embeddings = saucie.get_layer(load, 'embeddings') rawdata = get_data(f, return_rawfile=True) outcols = rawdata.columns.tolist() + ['Cluster', 'Embedding_SAUCIE1', 'Embedding_SAUCIE2'] rawdata = pd.concat([rawdata, pd.DataFrame(clusters), pd.DataFrame(embeddings[:, 0]), pd.DataFrame(embeddings[:, 1])], axis=1) outfile = os.path.join(data_dir, fname) fcswrite.write_fcs(outfile, outcols, rawdata) except Exception as ex: # if it didn't run all the way through, clean everything up and remove it shutil.rmtree(data_dir) raise(ex)
def fcs(self, path, features, filtered=True, override=False): """Export the data of an RT-DC dataset to an .fcs file Parameters ---------- mm: instance of dclab.RTDCBase The dataset that will be exported. path: str Path to a .tsv file. The ending .tsv is added automatically. features: list of str The features in the resulting .tsv file. These are strings that are defined in `dclab.definitions.scalar_feature_names`, e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect". filtered: bool If set to `True`, only the filtered data (index in ds._filter) are used. override: bool If set to `True`, an existing file ``path`` will be overridden. If set to `False`, raises `OSError` if ``path`` exists. Notes ----- Due to incompatibility with the .fcs file format, all events with NaN-valued features are not exported. """ features = [c.lower() for c in features] ds = self.rtdc_ds path = pathlib.Path(path) # Make sure that path ends with .fcs if path.suffix != ".fcs": path = path.with_name(path.name + ".fcs") # Check if file already exist if not override and path.exists(): raise OSError("File already exists: {}\n".format( str(path).encode("ascii", "ignore")) + "Please use the `override=True` option.") # Check that features are in dfn.scalar_feature_names for c in features: if c not in dfn.scalar_feature_names: msg = "Unknown or unsupported feature name: {}".format(c) raise ValueError(msg) # Collect the header chn_names = [dfn.feature_name2label[c] for c in features] # Collect the data if filtered: data = [ds[c][ds._filter] for c in features] else: data = [ds[c] for c in features] data = np.array(data).transpose() fcswrite.write_fcs(filename=str(path), chn_names=chn_names, data=data)
def output_batch_correction(rawfiles): """Use already trained models to output batch corrected data.""" try: model_dir = os.path.join(args.output_dir, 'models', 'batch_corrected') data_dir = os.path.join(args.output_dir, 'batch_corrected') if os.path.exists(data_dir): shutil.rmtree(data_dir) os.mkdir(data_dir) ref = rawfiles[0] refx = get_data(ref) refname = os.path.split(ref)[-1] print("Starting to output {} batch corrected files...".format(len(rawfiles))) for counter, nonref in enumerate(rawfiles[1:]): nonrefname = os.path.split(nonref)[-1] print("Outputing file {}".format(counter)) nonrefx = get_data(nonref) alldata = np.concatenate([refx.as_matrix(), nonrefx.as_matrix()], axis=0) alllabels = np.concatenate([np.zeros(refx.shape[0]), np.ones(nonrefx.shape[0])], axis=0) load = Loader(data=alldata, labels=alllabels, shuffle=False) tf.reset_default_graph() restore_folder = os.path.join(model_dir, nonrefname) saucie = SAUCIE(None, restore_folder=restore_folder) recon, labels = saucie.get_layer(load, 'output') recon = sinh(recon) # write out reference file if counter == 0: reconref = recon[labels == 0] rawdata = get_data(ref, return_rawfile=True) for ind, c in enumerate(args.cols): rawdata.iloc[:, c] = reconref[:, ind] outfileref = os.path.join(data_dir, refname) fcswrite.write_fcs(outfileref, rawdata.columns.tolist(), rawdata) # write out nonreference file reconnonref = recon[labels == 1] rawdata = get_data(nonref, return_rawfile=True) for ind, c in enumerate(args.cols): rawdata.iloc[:, c] = reconnonref[:, ind] outfilenonref = os.path.join(data_dir, nonrefname) fcswrite.write_fcs(outfilenonref, rawdata.columns.tolist(), rawdata) except Exception as ex: # if it didn't run all the way through, clean everything up and remove it shutil.rmtree(data_dir) raise(ex)
def aggregate_cytometry_statistics(output_dir, config, mode='all', export_csv=True, export_fcs=True, variant=None): from cytokit.function import data as function_data # Aggregate all cytometry csv data (across tiles) res = function_data.get_cytometry_data(output_dir, config, mode=mode) # Get file extension, possibly with user-defined "variant" name to be included in all # resulting file names def ext(file_ext): return file_ext if variant is None else '{}.{}'.format( variant, file_ext) # Export result as csv csv_path, fcs_path = None, None if export_csv: csv_path = osp.join(output_dir, cytokit_io.get_cytometry_agg_path(ext('csv'))) cytokit_io.save_csv(csv_path, res, index=False) logger.info( 'Saved cytometry aggregation results to csv at "{}"'.format( csv_path)) if export_fcs: import re import fcswrite nonalnum = '[^0-9a-zA-Z]+' fcs_path = osp.join(output_dir, cytokit_io.get_cytometry_agg_path(ext('fcs'))) if len(res) > 0: # For FCS exports, save only integer and floating point values and replace any non-alphanumeric # column name characters with underscores res_fcs = res.select_dtypes( ['int', 'float']).rename(columns=lambda c: re.sub(nonalnum, '_', c)) if not osp.exists(osp.dirname(fcs_path)): os.makedirs(osp.dirname(fcs_path), exist_ok=True) fcswrite.write_fcs(filename=fcs_path, chn_names=res_fcs.columns.tolist(), data=res_fcs.values) logger.info( 'Saved cytometry aggregation results to fcs at "{}"'.format( fcs_path)) else: # fcswrite fails on writing empty datasets so log a warning instead logger.warning( 'Skipping FCS export because no objects were detected') return csv_path, fcs_path
def write_debarcoded_files(path, three_barcode_df_all_data, unique_row, barcode_ind, tag_name): for ibarcode, barcode in enumerate(barcode_ind): if ibarcode == len(unique_row) - 1: saving_df = three_barcode_df_all_data.iloc[ unique_row[ibarcode]:, :] else: saving_df = three_barcode_df_all_data.iloc[ unique_row[ibarcode]:unique_row[ibarcode + 1], :] saving_name = '-'.join(np.array(tag_name)[barcode]) + '.fcs' write_fcs(path + saving_name, list(three_barcode_df_all_data.columns), saving_df) print('Successfully wrote out debarcoded data into FCS files')
def write_fcs(self, path: Union[str, Path], cell_properties: Union[bool, Sequence[str]] = False, cell_channel_properties: Union[bool, Sequence[str]] = False, **kwargs): """Writes an FCS file, see :func:`to_dataframe` for format specifications Uses :func:`fcswrite.write_fcs` for writing FCS 3.0 files. :param path: path to the .fcs file to be written :param cell_properties: list of cell properties (e.g. regionprops) to include; set to ``True`` to include all :param cell_channel_properties: list of cell channel properties (e.g. intensity values) to include; set to ``True`` to include all :param kwargs: other arguments passed to :func:`fcswrite.write_fcs` """ if fcswrite is None: raise RuntimeError('fcswrite is not installed') df = self.to_dataframe(cell_properties=cell_properties, cell_channel_properties=cell_channel_properties) fcswrite.write_fcs(path, df.columns.values, df.values, **kwargs)
def fcs_write(df, fname): """Write the merged FCS data into a new file given the FCS data, creates a new file in FCS3.0 format Args: df: FCS dataframe fname: Output file name Returns: new event count for the merged file and channel names """ data = df.to_numpy() channels = list(df.columns) # replace any spaces in the marker names to "-" for consistency channels = [x.replace(" ", "-") for x in channels] fcswrite.write_fcs(filename=fname, chn_names=channels, data=data) event_count = data.shape[0] return channels, event_count
print("MISSING CELL_INDEX") f_reduced.reset_index(inplace=True) f_reduced.rename({"index": "Cell_Index"}, axis="columns", inplace=True) f_reduced["Cell_Index"] = pd.to_numeric(f_reduced["Cell_Index"]) print(f_reduced) #Print final dataframe #Saving files#: if i in txt_filelist: f_reduced.to_csv(f"{output_dir}/{info_run}/Pro_{i}", index=False, sep='\t') # index = False to be compatible with Cytobank if txt_sopts: #SAVE AS FCS fcswrite.write_fcs(f"{output_dir}/{info_run}/Pro_{i}.fcs", chn_names=list(f_reduced.columns), compat_chn_names=False, data=f_reduced.to_numpy()) else: # answ = yes_or_NO("File is an .fcs. Would you like to also save it as a .txt?", # default=nonstandard_FCS) fcswrite.write_fcs(f"{output_dir}/{info_run}/Pro_{i}", chn_names=list(f_reduced.columns), compat_chn_names=False, data=f_reduced.to_numpy()) if fcs_sopts: print("Converting .fcs to .txt") f_reduced.to_csv(f"{output_dir}/{info_run}/Pro_{i}.txt", index=False, sep='\t') #Changed to index=False
def fcs(self, path, features, meta_data=None, filtered=True, override=False): """Export the data of an RT-DC dataset to an .fcs file Parameters ---------- path: str Path to an .fcs file. The ending .fcs is added automatically. features: list of str The features in the resulting .fcs file. These are strings that are defined by `dclab.definitions.scalar_feature_exists`, e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect". meta_data: dict User-defined, optional key-value pairs that are stored in the primary TEXT segment of the FCS file; the version of dclab is stored there by default filtered: bool If set to `True`, only the filtered data (index in ds.filter.all) are used. override: bool If set to `True`, an existing file ``path`` will be overridden. If set to `False`, raises `OSError` if ``path`` exists. Notes ----- Due to incompatibility with the .fcs file format, all events with NaN-valued features are not exported. """ if meta_data is None: meta_data = {} if not FCSWRITE_AVAILABLE: raise ModuleNotFoundError( "Package `fcswrite` required for fcs export!") features = [c.lower() for c in features] ds = self.rtdc_ds path = pathlib.Path(path) # Make sure that path ends with .fcs if path.suffix != ".fcs": path = path.with_name(path.name + ".fcs") # Check if file already exist if not override and path.exists(): raise OSError("File already exists: {}\n".format( str(path).encode("ascii", "ignore")) + "Please use the `override=True` option.") # Check that features are valid for c in features: if c not in ds.features_scalar: msg = "Invalid feature name: {}".format(c) raise ValueError(msg) # Collect the header chn_names = [dfn.get_feature_label(c, rtdc_ds=ds) for c in features] # Collect the data if filtered: data = [ds[c][ds.filter.all] for c in features] else: data = [ds[c] for c in features] data = np.array(data).transpose() meta_data["dclab version"] = version fcswrite.write_fcs( filename=str(path), chn_names=chn_names, data=data, text_kw_pr=meta_data, )