def setUp(self): import os cwd = os.path.dirname(os.path.abspath(__file__)) self.ex = flow.Experiment() self.ex.add_conditions({"time": "float"}) self.tube1 = fcsparser.parse(cwd + "/data/Plate01/RFP_Well_A3.fcs", reformat_meta=True) self.tube2 = fcsparser.parse(cwd + "/data/Plate01/CFP_Well_A4.fcs", reformat_meta=True)
def load_well(label): # Short-circuit the case where the well has already been loaded, which # is triggered by the "from" external reference machinery. if isinstance(label, Well): return label # Parse well and plate names from the given label. The plate name is # optional, because often there is only one. plate, well = parse_well_label(label) # Find the *.fcs file referenced by the given label. if plate not in plates: raise UsageError( "Plate '{}' not defined.".format(plate) if plate is not None else "No default plate defined.") plate_path = plates[plate] well_paths = list(plate_path.glob(well_glob.format(well))) if len(well_paths) == 0: raise UsageError("No *.fcs files found for well '{}'".format(label)) if len(well_paths) > 1: raise UsageError("Multiple *.fcs files found for well '{}'".format(label)) well_path = well_paths[0] # Load the cell data for the given well. logging.info('Loading {}'.format(well_path.name)) meta, data = fcsparser.parse(str(well_path)) return Well(label, meta, data)
def default_view(self): """ Returns a diagnostic plot to see if the bleedthrough spline estimation is working. Returns ------- IView : An IView, call plot() to see the diagnostic plots """ if set(self.controls.keys()) != set(self._splines.keys()): raise CytoflowOpError("Must have both the controls and bleedthrough to plot") channels = self.controls.keys() # make sure we can get the control tubes to plot the diagnostic for channel in channels: try: _ = fcsparser.parse(self.controls[channel], meta_data_only = True, reformat_meta = True) except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\ .format(self.controls[channel], e.value)) return BleedthroughPiecewiseDiagnostic(op = self)
def check_tube(filename, experiment, ignore_v = False): try: tube_meta = fcsparser.parse( filename, channel_naming = experiment.metadata["name_metadata"], meta_data_only = True, reformat_meta = True) except Exception as e: raise util.CytoflowOpError("FCS reader threw an error reading metadata " " for tube {0}: {1}" .format(filename, str(e))) # first make sure the tube has the right channels if set(tube_meta["_channel_names_"]) != set(experiment.channels): raise util.CytoflowError("Tube {0} doesn't have the same channels " "as the first tube added".format(filename)) tube_channels = tube_meta["_channels_"] tube_channels.set_index(experiment.metadata["name_metadata"], inplace = True) # next check the per-channel parameters for channel in experiment.channels: # first check voltage if "voltage" in experiment.metadata[channel]: if not "$PnV" in tube_channels.ix[channel]: raise util.CytoflowError("Didn't find a voltage for channel {0}" \ "in tube {1}".format(channel, filename)) old_v = experiment.metadata[channel]["voltage"] new_v = tube_channels.ix[channel]['$PnV'] if old_v != new_v and not ignore_v: raise util.CytoflowError("Tube {0} doesn't have the same voltages" .format(filename))
def get_one_fcs(track_nextdata): meta, data = fcsparser.parse(path, dataset_start=track_nextdata, reformat_meta=False) print "sampleID:", meta["GTI$SAMPLEID"] if meta["$NEXTDATA"] == 0: return -1 else: return track_nextdata + meta["$NEXTDATA"]
def init_model(self, op): dtype_to_trait = {"category" : Str, "float" : Float, "bool" : Bool, "int" : Int} for op_tube in op.tubes: tube = Tube(file = op_tube.file, parent = self) # first load the tube's metadata and set special columns try: tube_meta = fcsparser.parse(op_tube.file, meta_data_only = True, reformat_meta = True) #tube_channels = tube_meta["_channels_"].set_index("$PnN") except Exception as e: error(None, "FCS reader threw an error on tube {0}: {1}"\ .format(op_tube.file, e.value), "Error reading FCS file") return # if we're the first tube loaded, create a dummy experiment if not self.dummy_experiment: self.dummy_experiment = ImportOp(tubes = [op_tube], conditions = op.conditions, coarse_events = 1).apply() if '$SRC' in tube_meta: self.tube_traits["$SRC"] = Str(condition = False) tube.add_trait("$SRC", Str(condition = False)) tube.trait_set(**{"$SRC" : tube_meta['$SRC']}) if 'TUBE NAME' in tube_meta: #self._add_metadata("TUBE NAME", "TUBE NAME", Str(condition = False)) self.tube_traits["TUBE NAME"] = Str(condition = False) tube.add_trait("TUBE NAME", Str(condition = False)) tube.trait_set(**{"TUBE NAME" : tube_meta['TUBE NAME']}) if '$SMNO' in tube_meta: #self._add_metadata("$SMNO", "$SMNO", Str(condition = False)) self.tube_traits["$SMNO"] = Str(condition = False) tube.add_trait("$SMNO", Str(condition = False)) tube.trait_set(**{"$SMNO" : tube_meta['SMNO']}) # next set conditions for condition in op_tube.conditions: condition_dtype = op.conditions[condition] condition_trait = \ dtype_to_trait[condition_dtype](condition = True) tube.add_trait(condition, condition_trait) if not condition in self.tube_traits: self.tube_traits[condition] = condition_trait tube.trait_set(**op_tube.conditions) self.tubes.append(tube)
def init_model(self, op): # I DON'T KNOW WHY THIS STICKS AROUND ACROSS DIALOG INVOCATIONS. del self.tubes[:] dtype_to_trait = {"category": Str, "float": Float, "log": LogFloat, "bool": Bool, "int": Int} for op_tube in op.tubes: tube = Tube(file=op_tube.file, parent=self) # first load the tube's metadata and set special columns try: tube_meta = fcsparser.parse(op_tube.file, meta_data_only=True, reformat_meta=True) # tube_channels = tube_meta["_channels_"].set_index("$PnN") except Exception as e: error( None, "FCS reader threw an error on tube {0}: {1}".format(op_tube.file, e.value), "Error reading FCS file", ) return if "$SRC" in tube_meta: self.tube_traits["$SRC"] = Str(condition=False) tube.add_trait("$SRC", Str(condition=False)) tube.trait_set(**{"$SRC": tube_meta["$SRC"]}) if "TUBE NAME" in tube_meta: # self._add_metadata("TUBE NAME", "TUBE NAME", Str(condition = False)) self.tube_traits["TUBE NAME"] = Str(condition=False) tube.add_trait("TUBE NAME", Str(condition=False)) tube.trait_set(**{"TUBE NAME": tube_meta["TUBE NAME"]}) if "$SMNO" in tube_meta: # self._add_metadata("$SMNO", "$SMNO", Str(condition = False)) self.tube_traits["$SMNO"] = Str(condition=False) tube.add_trait("$SMNO", Str(condition=False)) tube.trait_set(**{"$SMNO": tube_meta["SMNO"]}) # next set conditions for condition in op_tube.conditions: condition_dtype = op.conditions[condition] condition_trait = dtype_to_trait[condition_dtype](condition=True) tube.add_trait(condition, condition_trait) if not condition in self.tube_traits: self.tube_traits[condition] = condition_trait tube.trait_set(**op_tube.conditions) # if we're the first tube loaded, create a dummy experiment # to validate voltage, etc for later tubes if not self.dummy_experiment: self.model.dummy_experiment = ImportOp(tubes=[CytoflowTube(file=op_tube.file)], coarse_events=1).apply() self.tubes.append(tube)
def parse_tube(filename, experiment, ignore_v = False): check_tube(filename, experiment, ignore_v) try: _, tube_data = fcsparser.parse( filename, channel_naming = experiment.metadata["name_metadata"]) except Exception as e: raise util.CytoflowOpError("FCS reader threw an error reading data for tube " "{0}: {1}".format(filename, str(e))) return tube_data
def _on_add_tubes(self): """ Handle "Add tubes..." button. Add tubes to the experiment. """ # TODO - adding a set of files, then a condition, then another # set doesn't work. file_dialog = FileDialog() file_dialog.wildcard = "Flow cytometry files (*.fcs)|*.fcs|" file_dialog.action = 'open files' file_dialog.open() if file_dialog.return_code != PyfaceOK: return for path in file_dialog.paths: try: tube_meta = fcsparser.parse(path, meta_data_only = True, reformat_meta = True) tube_channels = tube_meta["_channels_"].set_index("$PnN") except Exception as e: raise RuntimeError("FCS reader threw an error on tube {0}: {1}"\ .format(path, e.value)) tube = Tube() for trait_name, trait in self.model.tube_traits.items(): # TODO - do we still need to check for transient? tube.add_trait(trait_name, trait) # this magic makes sure the trait is actually defined # in tube.__dict__, so it shows up in trait_names etc. tube.trait_set(**{trait_name : trait.default_value}) if trait.condition: tube.on_trait_change(self._try_multiedit, trait_name) tube.trait_set(Source = tube_meta['$SRC'], _file = path, _parent = self.model) if 'TUBE NAME' in tube_meta: tube.Tube = tube_meta['TUBE NAME'] elif '$SMNO' in tube_meta: tube.Tube = tube_meta['$SMNO'] self.model.tubes.append(tube)
def test_fcs(): path = fcsparser.test_sample_path meta, data = fcsparser.parse(path) _, _, X = scprep.io.load_fcs(path) assert "Time" not in X.columns assert len(set(X.columns).difference(data.columns)) == 0 np.testing.assert_array_equal(X.index, data.index) np.testing.assert_array_equal(X.to_numpy(), data[X.columns].to_numpy()) _, _, X = scprep.io.load_fcs(path, sparse=True) assert "Time" not in X.columns assert len(set(X.columns).difference(data.columns)) == 0 np.testing.assert_array_equal(X.index, data.index) np.testing.assert_array_equal(X.sparse.to_dense().to_numpy(), data[X.columns].to_numpy()) X_meta, _, X = scprep.io.load_fcs(path, reformat_meta=False, override=True) _assert_fcs_meta_equal(meta, X_meta, reformat_meta=False)
def plot(self, experiment = None, **kwargs): """Plot a faceted histogram view of a channel""" try: beads_meta, beads_data = fcsparser.parse(self.op.beads_file, reformat_meta = True) beads_channels = beads_meta["_channels_"].set_index("$PnN") except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\ .format(self.op.beads_file, e.value)) plt.figure() channels = self.op.units.keys() for idx, channel in enumerate(channels): data = beads_data[channel] # bin the data on a log scale data_range = float(beads_channels.ix[channel]['$PnR']) hist_bins = np.logspace(1, math.log(data_range, 2), num = 256, base = 2) hist = np.histogram(data, bins = hist_bins) # mask off-scale values hist[0][0] = 0 hist[0][-1] = 0 hist_smooth = scipy.signal.savgol_filter(hist[0], 5, 1) # find peaks peak_bins = scipy.signal.find_peaks_cwt(hist_smooth, widths = np.arange(3, 20), max_distances = np.arange(3, 20) / 2) # filter by height and intensity peak_threshold = np.percentile(hist_smooth, self.op.bead_peak_quantile) peak_bins_filtered = \ [x for x in peak_bins if hist_smooth[x] > peak_threshold and hist[1][x] > self.op.bead_brightness_threshold] plt.subplot(len(channels), 1, idx+1) plt.xscale('log') plt.xlabel(channel) plt.plot(hist_bins[1:], hist_smooth) for peak in peak_bins_filtered: plt.axvline(hist_bins[peak], color = 'r')
def concatenate_fcs(input_dir): txt_filelist = [f for f in os.listdir(input_dir) if f.endswith(".txt")] fcs_filelist = [f for f in os.listdir(input_dir) if f.endswith(".fcs")] filelist = txt_filelist + fcs_filelist if len(filelist) == 0: sys.exit(f"ERROR: There are no files in {input_dir}!") no_arc = pd.DataFrame() #Add counter to keep track of the number of files in input -> # -> cell ID will be a mix of these (Filenumber | filename.txt) fcounter = 0 for i in filelist: file_path = f"{input_dir}/{i}" name = i.split('.')[0] fcounter += 1 if i in txt_filelist: print(i) df = pd.read_csv(file_path, sep='\t') else: try: #Use fcsparser to read the fcs data files print(i) df = fcsparser.parse(file_path, meta_data_only=False)[1] reg_pnn = re.compile("(\d+Di$)") #Detect if, despite flag pnn_extracted = [] #columns match PnN pattern for n in df.columns.values.tolist(): if reg_pnn.search(n): pnn_extracted.append(n) if len(pnn_extracted) != 0: raise fcsparser.api.ParserFeatureNotImplementedError except fcsparser.api.ParserFeatureNotImplementedError: print("WARNING: Non-standard .fcs file detected: ", i) #use rpy2 to read the files and load into python df = read_rFCS(file_path)[0] # add a new column of 'file_origin' that will be used to separate each file after umap calculation df["file_identifier"] = name df["file_origin"] = str(fcounter) + " | " + name #File+ID #This way the cell-index will be preserved after Cytobank upload try: df["Sample_ID-Cell_Index"] = df["Cell_Index"].apply( lambda x: str(fcounter) + "-" + str(x)) except KeyError: sys.exit( "ERROR: Cell_Index missing from data. Have you preprocessed it?" ) no_arc = no_arc.append(df, ignore_index=True) return no_arc, filelist
def get_clusters(sample_name): #all_neighbors = list(reversed(range(5, 100+1))) all_neighbors = list(reversed(range(5, 500 + 1, 5))) nn_nums = [] num_clusters = [] for nn in all_neighbors: try: _, data = fcsparser.parse(data_dir + '/' + sample_name + '/' + str(nn) + '/out/' + sample_name + '.fcs') clusters = len(np.unique(data['cluster_id'])) if clusters < 300: # hacky method right now to reduce size of tree num_clusters.append(clusters) nn_nums.append(nn) except: continue return num_clusters, nn_nums
def load_fcs(filename, gene_names=True, cell_names=True, sparse=None, metadata_channels=[ 'Time', 'Event_length', 'DNA1', 'DNA2', 'Cisplatin', 'beadDist', 'bead1' ]): """Load a fcs file Parameters ---------- filename : str The name of the fcs file to be loaded gene_names : `bool`, `str`, array-like, or `None` (default: True) If `True`, we assume gene names are contained in the file. Otherwise expects a filename or an array containing a list of gene symbols or ids cell_names : `bool`, `str`, array-like, or `None` (default: True) If `True`, we assume cell names are contained in the file. Otherwise expects a filename or an array containing a list of cell barcodes. sparse : bool, optional (default: None) If True, loads the data as a pd.SparseDataFrame. This uses less memory but more CPU. metadata_channels : list-like, optional (default: ['Time', 'Event_length', 'DNA1', 'DNA2', 'Cisplatin', 'beadDist', 'bead1']) Channels to be excluded from the data Returns ------- data : pd.DataFrame """ if cell_names is True: cell_names = None if gene_names is True: gene_names = None # Parse the fcs file meta, data = fcsparser.parse(filename) metadata_channels = data.columns.intersection(metadata_channels) data_channels = data.columns.difference(metadata_channels) metadata = data[metadata_channels] data = data[data_channels] data = _matrix_to_data_frame(data, gene_names=gene_names, cell_names=cell_names, sparse=sparse) return metadata, data
def get_data(fn, sample=0, return_rawfile=False): """Return DataFrame of an FCS file.""" meta, x = fcsparser.parse(fn) if return_rawfile: return x x = x.iloc[:, args.cols] newvals = asinh(x) x = pd.DataFrame(newvals, columns=x.columns) if sample: r = list(range(x.shape[0])) np.random.shuffle(r) r = r[:sample] x = x.iloc[r, :] return x
def apply(self, experiment = None): if not self.tubes or len(self.tubes) == 0: raise CytoflowOpError("Must specify some tubes!") # make sure each tube has the same conditions tube0_conditions = set(self.tubes[0].conditions) for tube in self.tubes: tube_conditions = set(tube.conditions) if len(tube0_conditions ^ tube_conditions) > 0: raise CytoflowOpError("Tube {0} didn't have the same " "conditions as tube {1}" .format(tube.file, self.tubes[0].file)) # make sure experimental conditions are unique for idx, i in enumerate(self.tubes[0:-1]): for j in self.tubes[idx+1:]: if i.conditions_equal(j): raise CytoflowOpError("The same conditions specified for " "tube {0} and tube {1}" .format(i.file, j.file)) experiment = Experiment() for condition, dtype in self.conditions.items(): is_log = False if dtype == "log": is_log = True dtype = "float" experiment.add_conditions({condition : dtype}) if is_log: experiment.metadata[condition]["repr"] = "log" for tube in self.tubes: tube_fc = fcsparser.parse(tube.file, reformat_meta = True) if self.coarse: tube_meta, tube_data = tube_fc tube_data = tube_data.loc[np.random.choice(tube_data.index, self.coarse_events, replace = False)] tube_fc = (tube_meta, tube_data) experiment.add_tube(tube_fc, tube.conditions, ignore_v = self.ignore_v) return experiment
def check_tube(filename, experiment, data_set=0): if experiment is None: raise util.CytoflowError("No experiment specified") ignore_v = experiment.metadata['ignore_v'] try: tube_meta = fcsparser.parse( filename, channel_naming=experiment.metadata["name_metadata"], data_set=data_set, meta_data_only=True, reformat_meta=True) except Exception as e: raise util.CytoflowError("FCS reader threw an error reading metadata " "for tube {0}".format(filename)) from e # first make sure the tube has the right channels if not set( [experiment.metadata[c]["fcs_name"] for c in experiment.channels]) <= set(tube_meta["_channel_names_"]): raise util.CytoflowError( "Tube {0} doesn't have the same channels".format(filename)) tube_channels = tube_meta["_channels_"] tube_channels.set_index(experiment.metadata["name_metadata"], inplace=True) # next check the per-channel parameters for channel in experiment.channels: fcs_name = experiment.metadata[channel]["fcs_name"] # first check voltage if "voltage" in experiment.metadata[channel]: if not "$PnV" in tube_channels.loc[fcs_name]: raise util.CytoflowError("Didn't find a voltage for channel {0}" \ "in tube {1}".format(channel, filename)) old_v = experiment.metadata[channel]["voltage"] new_v = tube_channels.loc[fcs_name]['$PnV'] if old_v != new_v and not channel in ignore_v: raise util.CytoflowError( "Tube {0} doesn't have the same voltages for channel ". format(filename) + str(channel))
def plot(self, experiment = None, **kwargs): """Plot a faceted histogram view of a channel""" kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) plt.figure() channels = self.op._splines.keys() num_channels = len(channels) for from_idx, from_channel in enumerate(channels): for to_idx, to_channel in enumerate(channels): if from_idx == to_idx: continue try: _, tube_data = fcsparser.parse(self.op.controls[from_channel], reformat_meta = True) except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\ .format(self.op.controls[from_channel], e.value)) plt.subplot(num_channels, num_channels, from_idx + (to_idx * num_channels) + 1) plt.xscale('log', nonposx='mask') plt.yscale('log', nonposy='mask') plt.xlabel(from_channel) plt.ylabel(to_channel) plt.scatter(tube_data[from_channel], tube_data[to_channel], alpha = 0.1, s = 1, marker = 'o') spline = self.op._splines[from_channel][to_channel] xs = np.logspace(-1, math.log(tube_data[from_channel].max(), 10)) plt.plot(xs, spline(xs), 'g-', lw=3)
def loadDeepCyTOFData(dataPath, dataIndex, relevantMarkers, mode, skip_header=0): if mode == 'CSV.GZ': data_filename = dataPath + "/" + str( dataIndex) # I'm just going to give it the file name X = pd.read_csv(os.path.join(io.DeepLearningRoot(), data_filename)).to_numpy() # print(np.shape(X)) actual = pd.read_csv( os.path.join(io.DeepLearningRoot(), data_filename.replace("/x/", "/y/"))) labels = pd.DataFrame([0] * len(actual)) for aci in range(len(actual.columns)): labels[actual[actual.columns[aci]] == 1] = aci + 1 labels = [ item for sublist in labels.values.tolist() for item in sublist ] else: if mode == 'CSV': data_filename = dataPath + '/sample' + str(dataIndex) + '.csv' X = genfromtxt(os.path.join(io.DeepLearningRoot(), data_filename), delimiter=',', skip_header=skip_header) if mode == 'FCS': data_filename = dataPath + '/sample' + str(dataIndex) + '.fcs' _, X = fcsparser.parse(os.path.join(io.DeepLearningRoot(), data_filename), reformat_meta=True) X = X.as_matrix() label_filename = dataPath + '/labels' + str(dataIndex) + '.csv' labels = genfromtxt(os.path.join(io.DeepLearningRoot(), label_filename), delimiter=',') labels = np.int_(labels) X = X[:, relevantMarkers] sample = Sample(X, labels) return sample
def default_view(self): """ Returns a diagnostic plot to see if the bleedthrough spline estimation is working. Returns ------- IView : An IView, call plot() to see the diagnostic plots """ try: _ = fcsparser.parse(self.beads_file, meta_data_only = True, reformat_meta = True) except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\ .format(self.beads_file, e.value)) return BeadCalibrationDiagnostic(op = self)
def autodetect_name_metadata(filename, data_set = 0): try: with warnings.catch_warnings(): warnings.simplefilter("ignore") metadata = fcsparser.parse(filename, data_set = data_set, meta_data_only = True, reformat_meta = True) except Exception as e: warnings.warn("Trouble getting metadata from {}: {}".format(filename, str(e)), util.CytoflowWarning) return '$PnS' meta_channels = metadata["_channels_"] if "$PnN" in meta_channels and not "$PnS" in meta_channels: name_metadata = "$PnN" elif "$PnN" not in meta_channels and "$PnS" in meta_channels: name_metadata = "$PnS" else: PnN = meta_channels["$PnN"] PnS = meta_channels["$PnS"] # sometimes not all of the channels have a $PnS. all the channels must # have a $PnN to be compliant with the spec if None in PnS: name_metadata = "$PnN" # sometimes one is unique and the other isn't if (len(set(PnN)) == len(PnN) and len(set(PnS)) != len(PnS)): name_metadata = "$PnN" elif (len(set(PnN)) != len(PnN) and len(set(PnS)) == len(PnS)): name_metadata = "$PnS" else: # as per fcsparser.api, $PnN is the "short name" (like FL-1) # and $PnS is the "actual name" (like "FSC-H"). so let's # use $PnS. name_metadata = "$PnS" return name_metadata
def from_fcs( cls, fcs_file, cofactor=5, metadata_channels=[ "Time", "Event_length", "DNA1", "DNA2", "Cisplatin", "beadDist", "bead1", ], ): # Parse the fcs file text, data = fcsparser.parse(fcs_file) data = data.astype(np.float64) # Extract the S and N features (Indexing assumed to start from 1) # Assumes channel names are in S no_channels = text["$PAR"] channel_names = [""] * no_channels for i in range(1, no_channels + 1): # S name try: channel_names[i - 1] = text["$P%dS" % i] except KeyError: channel_names[i - 1] = text["$P%dN" % i] data.columns = channel_names # Metadata and data metadata_channels = data.columns.intersection(metadata_channels) data_channels = data.columns.difference(metadata_channels) # metadata = data[metadata_channels] data = data[data_channels] # Transform if necessary if cofactor is not None or cofactor > 0: data = np.arcsinh(np.divide(data, cofactor)) return data
def plot(self, experiment = None, **kwargs): """Plot a faceted histogram view of a channel""" import matplotlib.pyplot as plt import seaborn as sns kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) _, blank_data = fcsparser.parse(self.op.blank_file, reformat_meta=True) plt.figure() for idx, channel in enumerate(self.op.channels): d = blank_data[channel] plt.subplot(len(self.op.channels), 1, idx+1) plt.title(channel) plt.hist(d, bins = 200, **kwargs) plt.axvline(self.op._af_median[channel], color = 'r')
def load_data(data_path, data_index, relevant_markers, mode, skip_header=0): if mode == 'CSV': data_filename = data_path + '/sample' + str(data_index) + '.csv' x = genfromtxt(os.path.join(io.deep_learning_root(), data_filename), delimiter=',', skip_header=skip_header) if mode == 'FCS': files = [file for file in os.listdir(data_path) if '.fcs' in file] data_filename = os.path.join(data_path, files[data_index]) _, x = fcsparser.parse(os.path.join(io.deep_learning_root(), data_filename), reformat_meta=True) x = x.as_matrix() x = x[:, relevant_markers] #label_filename = data_path + '/labels' + str(data_index) + '.csv' #labels = genfromtxt(os.path.join(io.deep_learning_root(), label_filename), delimiter=',') #labels = np.int_(labels) sample = Sample(x) return sample
def query_specimen_fcsfile_data(request): try: params = json.loads(request.body) filename = params['filename'] specimenid = params['specimenid'] specimen = Specimen.objects.get(specimenid=specimenid) if specimen is None: logger.error('param vaild') querysubdir = specimen.specimendir cols = {} meta, df = fcsparser.parse(get_fcsfilepath(querysubdir, filename)) for col in numpy.array(df.columns).tolist(): cols[col] = df[col].tolist() cols['filename'] = filename cols['specimenid'] = specimenid return em.create_sucess_response(cols) except Exception as e: logger.exception(e) return em.create_fail_response(e, em.FAIL)
def draw(self, specimengates): result = None for specimengate in specimengates: filename = specimengate.fcsfilename filepath = get_fcsfilepath(self.fcsfiledir, filename) meta, df = fcsparser.parse(filepath) normal_gates = None vetx_gate = None if specimengate.gatetype == 0: normal_gates = json.loads(specimengate.gates) elif specimengate.gatetype == 1: vetx_gate = json.loads(specimengate.gates) if normal_gates is not None: for gate in normal_gates: self.draw_normal_gate(filename, gate, df) if vetx_gate is not None: result = self.draw_vetx_gate(filename, vetx_gate, df) self.copy_last_plot() return self.imgs, result
def load_cll_data_1p_fcs(diagnosis_filename, cytometry_dir, features): X, y = [], [] diagnosis_df = pd.read_csv(diagnosis_filename, sep='\t') for filename in sorted(os.listdir(cytometry_dir)): if os.path.isdir(os.path.join(cytometry_dir, filename)): continue # filter out PB1 samples that we do not have diagnosis information about file_path = os.path.join(cytometry_dir, filename) if filename in diagnosis_df['FileName'].values: meta_data, file_df = fcsparser.parse(file_path, meta_data_only=False, reformat_meta=True) print(list(file_df)) X.append(file_df[features].values) y.append(diagnosis_df.loc[diagnosis_df['FileName'] == filename] ['Diagnosis'].values[0]) d = {'no': 0, 'yes': 1} y = [d[_] for _ in y] print(y) return X, y
def cluster_fcs(filename, model, features=RELEVANT_FEATURES, return_type='array'): '''Trains specified cluster model on FCS file KMeans is trained with n_clusters=4. DBSCAN is trained with min_samples=100 and eps=5e4. Args: filename (str): filename of fcs data to be clustered model (str): 'kmeans' or 'dbscan' return_type (str): 'dataframe' or 'array' Returns: Either pd.DataFrame with new 'cluster_label' column or an np.array with cluster labels ''' # validate input if model not in {'kmeans', 'dbscan'}: raise Exception('model must be "kmeans" or "dbscan".') if return_type not in {'dataframe', 'array'}: raise Exception('return_type must be "dataframe" or "array".') # train cluster model if model.lower() == 'kmeans': model = KMeans(n_clusters=4) elif model.lower() == 'dbscan': model = DBSCAN(min_samples=100, eps=5e4) _, data = fcs.parse(filename) model.fit(data[features]) labels = model.labels_ if return_type == 'dataframe': data['cluster_label'] = labels return data elif return_type == 'array': return labels
def cell_stat(request): try: params = json.loads(request.body) filename = params['fcsfilename'] specimenid = params['specimenid'] polygons = params['polygongate'] specimen = Specimen.objects.get(specimenid=specimenid) meta, df = fcsparser.parse( get_fcsfilepath(specimen.specimendir, filename)) actual_x = df[SSC_A] actual_y = df[PerCP_A] gate = Gate() gate.load(polygons) x = actual_x.values.reshape(actual_x.values.size, 1) y = actual_y.values.reshape(actual_y.values.size, 1) points = numpy.concatenate((x, y), axis=1) result = gate.stat(points) result['detail'] = {} return em.create_sucess_response(result) except Exception as e: logger.exception(e) return em.create_fail_response(e, em.FAIL)
def fcs_to_csv(path, file_name, save_metadata=False, gate=True, alpha=0.4): R""" Reads in a Flow Cytometry Standard (FCS) file and exports all content directly to an easily parseable csv fie. Parameters ---------- path : str Path to .fcs file file_name : str Path to save file to .csv save_metadata : bool If True, a metadata file will also be saved. It will have the name of `path` with `_metadata.csv` gate : bool If True, the provided data will be gated. mass_frac : float [0, 1] The highest-density fraction of the data desired. """ # Ensure provided file is actually .fcs if path.split('.')[-1] != 'fcs': raise RuntimeError("`path` is not an FCS file.") meta, data = fcsparser.parse(path) if gate == True: gated = gaussian_gate(data, alpha=alpha) else: data['gate'] = 0 gated = data.copy() gated = gated.loc[:, ['FSC-A', 'SSC-A', 'FITC-A', 'gate']] gated.to_csv(file_name, index=False) if save_metadata: meta_df = pd.DataFrame(meta) meta_name = '{0}_metadata.csv'.format(path[:-4]) meta_df.to_csv(meta_name, index=False)
def from_fcs(cls, fcs_file, cofactor=5, metadata_channels=[ 'Time', 'Event_length', 'DNA1', 'DNA2', 'Cisplatin', 'beadDist', 'bead1' ]): # Parse the fcs file text, data = fcsparser.parse(fcs_file) data = data.astype(np.float64) # Extract the S and N features (Indexing assumed to start from 1) # Assumes channel names are in S no_channels = text['$PAR'] channel_names = [''] * no_channels for i in range(1, no_channels + 1): # S name try: channel_names[i - 1] = text['$P%dS' % i] except KeyError: channel_names[i - 1] = text['$P%dN' % i] data.columns = channel_names # Metadata and data metadata_channels = data.columns.intersection(metadata_channels) data_channels = data.columns.difference(metadata_channels) metadata = data[metadata_channels] data = data[data_channels] # Transform if necessary if cofactor is not None or cofactor > 0: data = np.arcsinh(np.divide(data, cofactor)) # Create and return scdata object scdata = cls(data, 'masscyt', metadata) return scdata
def test_fcs_header_error(): path = fcsparser.test_sample_path meta, data = fcsparser.parse(path, reformat_meta=True, channel_naming="$PnN") meta_bad = copy.deepcopy(meta) meta_bad["$DATASTART"] = meta_bad["__header__"]["data start"] meta_bad["$DATAEND"] = meta_bad["__header__"]["data end"] meta_bad["__header__"]["data start"] = 0 meta_bad["__header__"]["data end"] = 0 assert (scprep.io.fcs._parse_fcs_header(meta_bad)["$DATASTART"] == scprep.io.fcs._parse_fcs_header(meta)["$DATASTART"]) assert (scprep.io.fcs._parse_fcs_header(meta_bad)["$DATAEND"] == scprep.io.fcs._parse_fcs_header(meta)["$DATAEND"]) meta_bad = copy.deepcopy(meta) meta_bad["$DATATYPE"] = "invalid" utils.assert_raises_message( ValueError, "Expected $DATATYPE in ['F', 'D']. " "Got 'invalid'", scprep.io.fcs._parse_fcs_header, meta_bad, ) meta_bad = copy.deepcopy(meta) for byteord, endian in zip(["4,3,2,1", "1,2,3,4"], [">", "<"]): meta_bad["$BYTEORD"] = byteord assert scprep.io.fcs._parse_fcs_header(meta_bad)["$ENDIAN"] == endian meta_bad["$BYTEORD"] = "invalid" utils.assert_raises_message( ValueError, "Expected $BYTEORD in ['1,2,3,4', '4,3,2,1']. " "Got 'invalid'", scprep.io.fcs._parse_fcs_header, meta_bad, )
def test_fcs_reformat_meta(): path = fcsparser.test_sample_path meta, data = fcsparser.parse(path, reformat_meta=True) X_meta, _, X = scprep.io.load_fcs(path, reformat_meta=True, override=True) assert set(meta.keys()) == set(X_meta.keys()) for key in meta.keys(): try: np.testing.assert_array_equal(meta[key], X_meta[key], key) except AssertionError: if key == "$NEXTDATA" or (key.startswith("$P") and key.endswith("B")): np.testing.assert_array_equal(meta[key], int(X_meta[key]), key) elif key == "_channels_": for column in meta[key].columns: X_column = X_meta[key][column].astype( meta[key][column].dtype) np.testing.assert_array_equal(meta[key][column], X_column, key + column) else: raise assert 'Time' not in X.columns assert len(set(X.columns).difference(data.columns)) == 0 np.testing.assert_array_equal(X.index, data.index) np.testing.assert_array_equal(X.values, data[X.columns].values)
def loadDeepCyTOFData(dataPath, dataIndex, relevantMarkers, mode, skip_header=0): if mode == 'CSV': data_filename = dataPath + '/sample' + str(dataIndex) + '.csv' X = genfromtxt(os.path.join(io.DeepLearningRoot(), data_filename), delimiter=',', skip_header=skip_header) if mode == 'FCS': data_filename = dataPath + '/sample' + str(dataIndex) + '.fcs' _, X = fcsparser.parse(os.path.join(io.DeepLearningRoot(), data_filename), reformat_meta=True) X = X.as_matrix() X = X[:, relevantMarkers] label_filename = dataPath + '/labels' + str(dataIndex) + '.csv' labels = genfromtxt(os.path.join(io.DeepLearningRoot(), label_filename), delimiter=',') labels = np.int_(labels) sample = Sample(X, labels) return sample
def check_tube(filename, experiment): ignore_v = experiment.metadata['ignore_v'] try: tube_meta = fcsparser.parse( filename, channel_naming=experiment.metadata["name_metadata"], meta_data_only=True, reformat_meta=True) except Exception as e: raise util.CytoflowOpError("FCS reader threw an error reading metadata" " for tube {0}: {1}".format( filename, str(e))) # first make sure the tube has the right channels if not set(experiment.channels) <= set(tube_meta["_channel_names_"]): raise util.CytoflowOpError( "Tube {0} doesn't have the same channels".format(filename)) tube_channels = tube_meta["_channels_"] tube_channels.set_index(experiment.metadata["name_metadata"], inplace=True) # next check the per-channel parameters for channel in experiment.channels: # first check voltage if "voltage" in experiment.metadata[channel]: if not "$PnV" in tube_channels.ix[channel]: raise util.CytoflowOpError("Didn't find a voltage for channel {0}" \ "in tube {1}".format(channel, filename)) old_v = experiment.metadata[channel]["voltage"] new_v = tube_channels.ix[channel]['$PnV'] if old_v != new_v and not channel in ignore_v: raise util.CytoflowOpError( "Tube {0} doesn't have the same voltages".format(filename))
def fcs_csv(file, outDir): """Convert fcs file to csv. Args: file (str): Path to the directory containing the fcs file. outDir (str): Path to save the output csv file. Returns: Converted csv file. """ file_name = Path(file).stem logger.info('Started converting the fcs file ' + file_name) meta, data = fcsparser.parse(file, meta_data_only=False, reformat_meta=True) logger.info('Saving csv file ' + file_name) #Export the file as csv os.chdir(outDir) export_csv = data.to_csv(r'%s.csv' % file_name, index=None, header=True, encoding='utf-8-sig') return export_csv
def estimate(self, experiment, subset = None): """ Estimate the autofluorescence from *blank_file* """ if not experiment: raise CytoflowOpError("No experiment specified") if not set(self.channels) <= set(experiment.channels): raise CytoflowOpError("Specified channels that weren't found in " "the experiment.") # don't have to validate that blank_file exists; should crap out on # trying to set a bad value try: blank_meta, blank_data = \ fcsparser.parse(self.blank_file, reformat_meta = True) blank_channels = blank_meta["_channels_"].set_index("$PnN") except Exception as e: raise CytoflowOpError("FCS reader threw an error: " + e.value) for channel in self.channels: v = experiment.metadata[channel]['voltage'] if not "$PnV" in blank_channels.ix[channel]: raise CytoflowOpError("Didn't find a voltage for channel {0}" \ "in tube {1}".format(channel, self.blank_file)) blank_v = blank_channels.ix[channel]['$PnV'] if blank_v != v: raise CytoflowOpError("Voltage differs for channel {0}".format(channel)) for channel in self.channels: self._af_median[channel] = np.median(blank_data[channel]) self._af_stdev[channel] = np.std(blank_data[channel])
def estimate(self, experiment, subset = None): """ Estimate the mapping from the two-channel controls """ if not experiment: raise CytoflowOpError("No experiment specified") tubes = {} for from_channel, to_channel in self.translation.iteritems(): if (from_channel, to_channel) not in self.controls: raise CytoflowOpError("Control file for {0} --> {1} " "not specified" .format(from_channel, to_channel)) tube_file = self.controls[(from_channel, to_channel)] if tube_file not in tubes: try: tube_meta, tube_data = fcsparser.parse(tube_file, reformat_meta = True) tube_channels = tube_meta["_channels_"].set_index("$PnN") except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube " "{0}: {1}" .format(tube_file, e.value)) # check voltages for channel in [from_channel, to_channel]: exp_v = experiment.metadata[channel]['voltage'] if not "$PnV" in tube_channels.ix[channel]: raise CytoflowOpError("Didn't find a voltage for " "channel {0} in tube {1}" .format(channel, self.controls[channel])) control_v = tube_channels.ix[channel]["$PnV"] if control_v != exp_v: raise CytoflowOpError("Voltage differs for channel " "{0} in tube {1}" .format(channel, self.controls[channel])) # autofluorescence correction af = [(channel, (experiment.metadata[channel]['af_median'], experiment.metadata[channel]['af_stdev'])) for channel in experiment.channels if 'af_median' in experiment.metadata[channel]] for af_channel, (af_median, af_stdev) in af: tube_data[af_channel] = tube_data[af_channel] - af_median tube_data = tube_data[tube_data[af_channel] > -3 * af_stdev] tube_data.reset_index(drop = True, inplace = True) # bleedthrough correction old_tube_data = tube_data.copy() bleedthrough = \ {channel: experiment.metadata[channel]['piecewise_bleedthrough'] for channel in experiment.channels if 'piecewise_bleedthrough' in experiment.metadata[channel]} for channel, (interp_channels, interpolator) in bleedthrough.iteritems(): interp_data = old_tube_data[interp_channels] tube_data[channel] = interpolator(interp_data) # bead calibration beads = [(channel, experiment.metadata[channel]['bead_calibration_fn']) for channel in experiment.channels if 'bead_calibration_fn' in experiment.metadata[channel]] for channel, calibration_fn in beads: tube_data[channel] = calibration_fn(tube_data[channel]) tubes[tube_file] = tube_data data = tubes[tube_file][[from_channel, to_channel]] data = data[data[from_channel] > 0] data = data[data[to_channel] > 0] _ = data.reset_index(drop = True, inplace = True) if self.mixture_model: gmm = sklearn.mixture.GMM(n_components=2) fit = gmm.fit(np.log10(data[from_channel][:, np.newaxis])) mu_idx = 0 if fit.means_[0][0] > fit.means_[1][0] else 1 weights = [x[mu_idx] for x in fit.predict_proba(np.log10(data[from_channel][:, np.newaxis]))] else: weights = [1] * len(data.index) lr = np.polyfit(np.log10(data[from_channel]), np.log10(data[to_channel]), deg = 1, w = weights) self._coefficients[(from_channel, to_channel)] = lr
def plot(self, experiment, **kwargs): """ Plot the plots """ if not experiment: raise CytoflowViewError("No experiment specified") tubes = {} plt.figure() num_plots = len(self.op.translation.keys()) plt_idx = 0 for from_channel, to_channel in self.op.translation.iteritems(): if (from_channel, to_channel) not in self.op.controls: raise CytoflowOpError("Control file for {0} --> {1} not specified" .format(from_channel, to_channel)) tube_file = self.op.controls[(from_channel, to_channel)] if tube_file not in tubes: try: _, tube_data = fcsparser.parse(tube_file, reformat_meta = True) except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\ .format(tube_file, e.value)) # autofluorescence correction af = [(channel, (experiment.metadata[channel]['af_median'], experiment.metadata[channel]['af_stdev'])) for channel in experiment.channels if 'af_median' in experiment.metadata[channel]] for af_channel, (af_median, af_stdev) in af: tube_data[af_channel] = tube_data[af_channel] - af_median tube_data = tube_data[tube_data[af_channel] > -3 * af_stdev] tube_data.reset_index(drop = True, inplace = True) # bleedthrough correction old_tube_data = tube_data.copy() bleedthrough = \ {channel: experiment.metadata[channel]['piecewise_bleedthrough'] for channel in experiment.channels if 'piecewise_bleedthrough' in experiment.metadata[channel]} for channel, (interp_channels, interpolator) in bleedthrough.iteritems(): interp_data = old_tube_data[interp_channels] tube_data[channel] = interpolator(interp_data) # bead calibration beads = [(channel, experiment.metadata[channel]['bead_calibration_fn']) for channel in experiment.channels if 'bead_calibration_fn' in experiment.metadata[channel]] for channel, calibration_fn in beads: tube_data[channel] = calibration_fn(tube_data[channel]) tubes[tube_file] = tube_data from_range = experiment.metadata[from_channel]['range'] to_range = experiment.metadata[to_channel]['range'] data = tubes[tube_file][[from_channel, to_channel]] data = data[data[from_channel] > 0] data = data[data[to_channel] > 0] _ = data.reset_index(drop = True, inplace = True) if self.op.mixture_model: plt.subplot(num_plots, 2, plt_idx * 2 + 2) plt.xscale('log', nonposx='mask') hist_bins = np.logspace(1, math.log(from_range, 2), num = 128, base = 2) _ = plt.hist(data[from_channel], bins = hist_bins, histtype = 'stepfilled', antialiased = True) plt.xlabel(from_channel) gmm = sklearn.mixture.GMM(n_components=2) fit = gmm.fit(np.log10(data[from_channel][:, np.newaxis])) mu_idx = 0 if fit.means_[0][0] > fit.means_[1][0] else 1 weights = [x[mu_idx] for x in fit.predict_proba(np.log10(data[from_channel][:, np.newaxis]))] plt.axvline(10 ** fit.means_[0][0], color = 'r') plt.axvline(10 ** fit.means_[1][0], color = 'r') else: weights = [1] * len(data.index) lr = np.polyfit(np.log10(data[from_channel]), np.log10(data[to_channel]), deg = 1, w = weights) num_cols = 2 if self.op.mixture_model else 1 plt.subplot(num_plots, num_cols, plt_idx * num_cols + 1) plt.xscale('log', nonposx = 'mask') plt.yscale('log', nonposy = 'mask') plt.xlabel(from_channel) plt.ylabel(to_channel) plt.xlim(1, from_range) plt.ylim(1, to_range) kwargs.setdefault('alpha', 0.2) kwargs.setdefault('s', 1) kwargs.setdefault('marker', 'o') plt.scatter(data[from_channel], data[to_channel], **kwargs) xs = np.logspace(1, math.log(from_range, 2), num = 256, base = 2) p = np.poly1d(lr) plt.plot(xs, 10 ** p(np.log10(xs)), "--g") plt_idx = plt_idx + 1
def apply(self, experiment=None, metadata_only=False): """ Load a new :class:`.Experiment`. Parameters ---------- experiment : Experiment Ignored metadata_only : bool (default = False) Only "import" the metadata, creating an Experiment with all the expected metadata and structure but 0 events. Returns ------- Experiment The new :class:`.Experiment`. New channels have the following metadata: - **voltage** - int The voltage that this channel was collected at. Determined by the ``$PnV`` field from the first FCS file. - **range** - int The maximum range of this channel. Determined by the ``$PnR`` field from the first FCS file. New experimental conditions do not have **voltage** or **range** metadata, obviously. Instead, they have **experiment** set to ``True``, to distinguish the experimental variables from the conditions that were added by gates, etc. If :attr:`ignore_v` is set, it is added as a key to the :class:`.Experiment`-wide metadata. """ if not self.tubes or len(self.tubes) == 0: raise util.CytoflowOpError('tubes', "Must specify some tubes!") # if we have channel renaming, make sure the new names are valid # python identifiers if self.channels: for old_name, new_name in self.channels.items(): if old_name != new_name and new_name != util.sanitize_identifier( new_name): raise util.CytoflowOpError( 'channels', "Channel name {} must be a " "valid Python identifier.".format(new_name)) # make sure each tube has the same conditions tube0_conditions = set(self.tubes[0].conditions) for tube in self.tubes: tube_conditions = set(tube.conditions) if len(tube0_conditions ^ tube_conditions) > 0: raise util.CytoflowOpError( 'tubes', "Tube {0} didn't have the same " "conditions as tube {1}".format(tube.file, self.tubes[0].file)) # make sure experimental conditions are unique for idx, i in enumerate(self.tubes[0:-1]): for j in self.tubes[idx + 1:]: if i.conditions_equal(j): raise util.CytoflowOpError( 'tubes', "The same conditions specified for " "tube {0} and tube {1}".format(i.file, j.file)) experiment = Experiment() experiment.metadata["ignore_v"] = self.ignore_v for condition, dtype in list(self.conditions.items()): experiment.add_condition(condition, dtype) experiment.metadata[condition]['experiment'] = True try: # silence warnings about duplicate channels; # we'll figure that out below with warnings.catch_warnings(): warnings.simplefilter("ignore") tube0_meta = fcsparser.parse(self.tubes[0].file, data_set=self.data_set, meta_data_only=True, reformat_meta=True) except Exception as e: raise util.CytoflowOpError( 'tubes', "FCS reader threw an error reading metadata " "for tube {}: {}".format(self.tubes[0].file, str(e))) from e meta_channels = tube0_meta["_channels_"] if self.name_metadata: experiment.metadata["name_metadata"] = self.name_metadata else: experiment.metadata["name_metadata"] = autodetect_name_metadata( self.tubes[0].file, data_set=self.data_set) meta_channels['Index'] = meta_channels.index meta_channels.set_index(experiment.metadata["name_metadata"], inplace=True) channels = list(self.channels.keys()) if self.channels \ else list(meta_channels.index.values) # make sure everything in self.channels is in the tube channels for channel in channels: if channel not in meta_channels.index: raise util.CytoflowOpError( 'channels', "Channel {0} not in tube {1}".format( channel, self.tubes[0].file)) # now that we have the metadata, load it into experiment for channel in channels: experiment.add_channel(channel) experiment.metadata[channel]["fcs_name"] = channel # keep track of the channel's PMT voltage if ("$PnV" in meta_channels.loc[channel]): v = meta_channels.loc[channel]['$PnV'] if v: experiment.metadata[channel]["voltage"] = v # add the maximum possible value for this channel. data_range = meta_channels.loc[channel]['$PnR'] data_range = float(data_range) experiment.metadata[channel]['range'] = data_range experiment.metadata['fcs_metadata'] = {} for tube in self.tubes: if metadata_only: tube_meta, tube_data = parse_tube(tube.file, experiment, data_set=self.data_set, metadata_only=True) else: tube_meta, tube_data = parse_tube(tube.file, experiment, data_set=self.data_set) if self.events: if self.events <= len(tube_data): tube_data = tube_data.loc[np.random.choice( tube_data.index, self.events, replace=False)] else: warnings.warn( "Only {0} events in tube {1}".format( len(tube_data), tube.file), util.CytoflowWarning) experiment.add_events(tube_data[channels], tube.conditions) # extract the row and column from wells collected on a # BD HTS if 'WELL ID' in tube_meta: pos = tube_meta['WELL ID'] tube_meta['CF_Row'] = pos[0] tube_meta['CF_Col'] = int(pos[1:3]) for i, channel in enumerate(channels): # remove the PnV tube metadata if '$P{}V'.format(i + 1) in tube_meta: del tube_meta['$P{}V'.format(i + 1)] # work around a bug where the PnR is sometimes not the detector range # but the data range. pnr = '$P{}R'.format(i + 1) if pnr in tube_meta and float( tube_meta[pnr] ) > experiment.metadata[channel]['range']: experiment.metadata[channel]['range'] = float( tube_meta[pnr]) tube_meta['CF_File'] = Path(tube.file).stem experiment.metadata['fcs_metadata'][tube.file] = tube_meta for channel in channels: if self.channels and channel in self.channels: new_name = self.channels[channel] if channel == new_name: continue experiment.data.rename(columns={channel: new_name}, inplace=True) experiment.metadata[new_name] = experiment.metadata[channel] experiment.metadata[new_name]["fcs_name"] = channel del experiment.metadata[channel] # this catches an odd corner case where some instruments store # instrument-specific info in the "extra" bits. we have to # clear them out. if tube0_meta['$DATATYPE'] == 'I': data_bits = int(meta_channels.loc[channel]['$PnB']) data_range = float(meta_channels.loc[channel]['$PnR']) range_bits = int(math.log(data_range, 2)) if range_bits < data_bits: mask = 1 for _ in range(1, range_bits): mask = mask << 1 | 1 experiment.data[channel] = experiment.data[ channel].values.astype('int') & mask # re-scale the data to linear if if's recorded as log-scaled with # integer channels data_range = float(meta_channels.loc[channel]['$PnR']) f1 = float(meta_channels.loc[channel]['$PnE'][0]) f2 = float(meta_channels.loc[channel]['$PnE'][1]) if f1 > 0.0 and f2 == 0.0: warnings.warn( 'Invalid $PnE = {},{} for channel {}, changing it to {},1.0' .format(f1, f2, channel, f1), util.CytoflowWarning) f2 = 1.0 if f1 > 0.0 and f2 > 0.0 and tube0_meta['$DATATYPE'] == 'I': warnings.warn( 'Converting channel {} from logarithmic to linear'.format( channel), util.CytoflowWarning) # experiment.data[channel] = 10 ** (f1 * experiment.data[channel] / data_range) * f2 return experiment
cats = set(self.data[meta_name].cat.categories) | set(new_data[meta_name].cat.categories) self.data[meta_name] = self.data[meta_name].cat.set_categories(cats) new_data[meta_name] = new_data[meta_name].cat.set_categories(cats) except (ValueError, TypeError): raise CytoflowError("Tube {0} had trouble converting conditions {1}" "(value = {2}) to type {3}" \ .format(tube_file, meta_name, meta_value, meta_type)) self._tube_conditions.add(frozenset(conditions.iteritems())) self.data = self.data.append(new_data, ignore_index = True) del new_data if __name__ == "__main__": import fcsparser ex = Experiment() ex.add_conditions({"time" : "category"}) tube1 = fcsparser.parse('../cytoflow/tests/data/Plate01/CFP_Well_A4.fcs') tube2 = fcsparser.parse('../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs') ex.add_tube(tube1, {"time" : "one"}) ex.add_tube(tube2, {"time" : "two"}) print(ex.data)
n = mu.shape[0] Sigma_det = np.linalg.det(Sigma) Sigma_inv = np.linalg.inv(Sigma) N = np.sqrt((2*np.pi)**n * Sigma_det) # This einsum call calculates (x-mu)T.Sigma-1.(x-mu) in a vectorized # way across all the input variables. fac = np.einsum('...k,kl,...l->...', pos-mu, Sigma_inv, pos-mu) return np.exp(-fac / 2) / N for index, row in iters2: print("test")#shameless cherrypicking for example data_dir='../FCS/' example_file=row.filename#"Huseyin2019-09-26.0211.fcs"#"Huseyin2019-09-24.0072.fcs"# meta, data = fcsparser.parse(data_dir + example_file, meta_data_only=False, reformat_meta=True) data.columns=[x.strip().replace('-', '_') for x in data.columns] df2=row#df[df.filename==example_file].iloc[0] data["GFP_Decomposed"]=np.exp((np.log(data["GFP_H"])*df2["log_std_v"]-df2["log_std_gfp"]*np.log(data["FSC_H"])*df2["log_rho"]+df2["log_std_gfp"]*df2["log_mean_v_mean"]*df2["log_rho"])/df2["log_std_v"]) f, [[ax,ax1],[ax2,ax3]] = plt.subplots(ncols=2,nrows=2,figsize=(10, 10),sharey=True,sharex=True,gridspec_kw = {'wspace':0, 'hspace':0}) f.suptitle(row.filename) ax.set_ylim([2,4.5]) ax.set_xlim([1,4]) ax2.axvline(df2["log_mean_v_mean"]) ax2.text(df2["log_mean_v_mean"]+0.02,1.5+1+0.15,'Context average',rotation=90) ax.text(df2["log_mean_v_mean"]+0.02,1.5+1+0.15,'Context average',rotation=90) ax.axvline(df2["log_mean_v_mean"]) #ax1.set_ylim([-1,5])
def apply(self, experiment = None): if not self.tubes or len(self.tubes) == 0: raise util.CytoflowOpError("Must specify some tubes!") # make sure each tube has the same conditions tube0_conditions = set(self.tubes[0].conditions) for tube in self.tubes: tube_conditions = set(tube.conditions) if len(tube0_conditions ^ tube_conditions) > 0: raise util.CytoflowOpError("Tube {0} didn't have the same " "conditions as tube {1}" .format(tube.file, self.tubes[0].file)) # make sure experimental conditions are unique for idx, i in enumerate(self.tubes[0:-1]): for j in self.tubes[idx+1:]: if i.conditions_equal(j): raise util.CytoflowOpError("The same conditions specified for " "tube {0} and tube {1}" .format(i.file, j.file)) experiment = Experiment() experiment.metadata["ignore_v"] = self.ignore_v for condition, dtype in self.conditions.items(): experiment.add_condition(condition, dtype) try: # silence warnings about duplicate channels; # we'll figure that out below with warnings.catch_warnings(): warnings.simplefilter("ignore") tube0_meta = fcsparser.parse(self.tubes[0].file, meta_data_only = True, reformat_meta = True) except Exception as e: raise util.CytoflowOpError("FCS reader threw an error reading metadata " " for tube {0}: {1}" .format(self.tubes[0].file, str(e))) meta_channels = tube0_meta["_channels_"] if self.name_metadata: experiment.metadata["name_metadata"] = self.name_metadata else: # try to autodetect the metadata if "$PnN" in meta_channels and not "$PnS" in meta_channels: experiment.metadata["name_metadata"] = "$PnN" elif "$PnN" not in meta_channels and "$PnS" in meta_channels: experiment.metadata["name_metadata"] = "$PnS" else: PnN = meta_channels["$PnN"] PnS = meta_channels["$PnS"] # sometimes one is unique and the other isn't if (len(set(PnN)) == len(PnN) and len(set(PnS)) != len(PnS)): experiment.metadata["name_metadata"] = "$PnN" elif (len(set(PnN)) != len(PnN) and len(set(PnS)) == len(PnS)): experiment.metadata["name_metadata"] = "$PnS" else: # as per fcsparser.api, $PnN is the "short name" (like FL-1) # and $PnS is the "actual name" (like "FSC-H"). so let's # use $PnS. experiment.metadata["name_metadata"] = "$PnS" meta_channels.set_index(experiment.metadata["name_metadata"], inplace = True) # now that we have the metadata, load it into experiment for channel in meta_channels.index: experiment.add_channel(channel) # keep track of the channel's PMT voltage if("$PnV" in meta_channels.ix[channel]): v = meta_channels.ix[channel]['$PnV'] if v: experiment.metadata[channel]["voltage"] = v # add the maximum possible value for this channel. data_range = meta_channels.ix[channel]['$PnR'] data_range = float(data_range) experiment.metadata[channel]['range'] = data_range for tube in self.tubes: tube_data = parse_tube(tube.file, experiment, self.ignore_v) if self.coarse_events: if self.coarse_events <= len(tube_data): tube_data = tube_data.loc[np.random.choice(tube_data.index, self.coarse_events, replace = False)] else: warnings.warn("Only {0} events in tube {1}" .format(len(tube_data), tube.file), util.CytoflowWarning) experiment.add_events(tube_data, tube.conditions) return experiment
def SimpleOverlay(filebig, bluefolder, redfolder, greenfolder, title, bluelabel, redlabel, greenlable, nonelabel, savespace): dfbig = [] for files in os.listdir(filebig): pathname = os.path.join(filebig, files) if '.DS_Store' not in pathname: if os.path.isfile(pathname): dfbig.append(fcsparser.parse(pathname)[1]) dfbigcomb = pd.concat(dfbig) titles = list(dfbigcomb.columns) channels = titles[4:len(titles)-2] dfbigadj = dfbigcomb[channels].applymap(lambda x:np.arcsinh(x/150)) dfbigadj['FSC-A'] = dfbigcomb['FSC-A']/dfbigcomb['FSC-A'].max() * 10 dfbigadj['SSC-A'] = dfbigcomb['SSC-A']/dfbigcomb['FSC-A'].max() * 10 dfblue = [] for files in os.listdir(bluefolder): pathname = os.path.join(bluefolder, files) if '.DS_Store' not in pathname: if os.path.isfile(pathname): dfblue.append(fcsparser.parse(pathname)[1]) dfbluecomb = pd.concat(dfblue) dfred = [] for files in os.listdir(redfolder): pathname = os.path.join(redfolder, files) if '.DS_Store' not in pathname: if os.path.isfile(pathname): dfred.append(fcsparser.parse(pathname)[1]) dfredcomb = pd.concat(dfred) dfgreen = [] for files in os.listdir(greenfolder): pathname = os.path.join(greenfolder, files) if '.DS_Store' not in pathname: if os.path.isfile(pathname): dfgreen.append(fcsparser.parse(pathname)[1]) dfgreencomb = pd.concat(dfgreen) cbtime = dfbigcomb['Time'].isin(dfbluecomb['Time']) browtime = np.where(cbtime == True)[0] browind = list(browtime) bnotrowtime = np.where(cbtime == False)[0] bnotrowind = list(bnotrowtime) crtime = dfbigcomb['Time'].isin(dfredcomb['Time']) rrowtime = np.where(crtime == True)[0] rrowind = list(rrowtime) rnotrowtime = np.where(crtime == False)[0] rnotrowind = list(rnotrowtime) cgtime = dfbigcomb['Time'].isin(dfgreencomb['Time']) growtime = np.where(cgtime == True)[0] growind = list(growtime) gnotrowtime = np.where(cgtime == False)[0] gnotrowind = list(gnotrowtime) e = umap.UMAP(random_state=0).fit_transform(dfbigadj) plt.scatter(e[bnotrowind,0], e[bnotrowind,1], s=.1, c=('#ABB2B9')) plt.scatter(e[rnotrowind,0], e[rnotrowind,1], s=.1, c=('#ABB2B9')) plt.scatter(e[gnotrowind,0], e[gnotrowind,1], s=.1, c=('#ABB2B9')) plt.scatter(e[rrowind,0], e[rrowind,1], s=.1, c='r') plt.scatter(e[browind,0], e[browind,1], s=.1, c='b') plt.scatter(e[growind,0], e[growind,1], s=.1, c='g') plt.title(title) plt.xticks([]) plt.yticks([]) redp = mpatches.Patch(color='red', label=redlabel) bluep = mpatches.Patch(color='b', label=bluelabel) greenp = mpatches.Patch(color='g', label=greenlabel) greyp = mpatches.Patch(color=('#ABB2B9'), label=nonelabel) plt.legend(handles=[redp, bluep, greenp, greyp])
atc_conc = 10 # in ng/mL RUN_NO = 2 promoter = '27yfp' gating_fraction = 0.4 ## Hardcoded arrangement garbage. xan_mgml = (0, 0, 0, 0.05, 0.1, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 2.5, 3.5, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0) _strains = [['auto'], ['delta'], ['dilution'] * 18] strains = [l[i] for l in _strains for i in range(len(l))] # Define directories and search pattern src = '../../../data/flow/fcs/' dst = '../../../data/flow/csv/' pattern = f'RP{DATE[:4]}-{DATE[4:6]}-{DATE[6:]}_r{RUN_NO}' # Get the names of the files. files = np.sort(glob.glob(f'{src}{pattern}*.fcs')) # %%Iterate through each strain and concentration. for s, c, f in zip(strains, xan_mgml, files): # Define the new name. new_name = f'{DATE}_r{RUN_NO}_{promoter}_{s}_{atc_conc}ngmlATC_{c}mgmlXAN' # Load the data using fcs parser and save to csv. _, data = fcsparser.parse(f) data.to_csv(f'{dst}{new_name}.csv') # Rename the FCS file. os.rename(f, f'{src}{new_name}.fcs')
def apply(self, experiment=None): """ Load a new :class:`.Experiment`. Returns ------- Experiment The new :class:`.Experiment`. New channels have the following metadata: - **voltage** - int The voltage that this channel was collected at. Determined by the ``$PnV`` field from the first FCS file. - **range** - int The maximum range of this channel. Determined by the ``$PnR`` field from the first FCS file. New experimental conditions do not have **voltage** or **range** metadata, obviously. Instead, they have **experiment** set to ``True``, to distinguish the experimental variables from the conditions that were added by gates, etc. If :attr:`ignore_v` is set, it is added as a key to the :class:`.Experiment`-wide metadata. """ if not self.tubes or len(self.tubes) == 0: raise util.CytoflowOpError('tubes', "Must specify some tubes!") # if we have channel renaming, make sure the new names are valid # python identifiers if self.channels: for old_name, new_name in self.channels.items(): if old_name != new_name and new_name != util.sanitize_identifier( new_name): raise util.CytoflowOpError( 'channels', "Channel name {} must be a " "valid Python identifier.".format(new_name)) # make sure each tube has the same conditions tube0_conditions = set(self.tubes[0].conditions) for tube in self.tubes: tube_conditions = set(tube.conditions) if len(tube0_conditions ^ tube_conditions) > 0: raise util.CytoflowOpError( 'tubes', "Tube {0} didn't have the same " "conditions as tube {1}".format(tube.file, self.tubes[0].file)) # make sure experimental conditions are unique for idx, i in enumerate(self.tubes[0:-1]): for j in self.tubes[idx + 1:]: if i.conditions_equal(j): raise util.CytoflowOpError( 'tubes', "The same conditions specified for " "tube {0} and tube {1}".format(i.file, j.file)) experiment = Experiment() experiment.metadata["ignore_v"] = self.ignore_v for condition, dtype in list(self.conditions.items()): experiment.add_condition(condition, dtype) experiment.metadata[condition]['experiment'] = True try: # silence warnings about duplicate channels; # we'll figure that out below with warnings.catch_warnings(): warnings.simplefilter("ignore") tube0_meta = fcsparser.parse(self.tubes[0].file, meta_data_only=True, reformat_meta=True) except Exception as e: raise util.CytoflowOpError( 'tubes', "FCS reader threw an error reading metadata " "for tube {}".format(self.tubes[0].file)) from e meta_channels = tube0_meta["_channels_"] if self.name_metadata: experiment.metadata["name_metadata"] = self.name_metadata else: # try to autodetect the metadata if "$PnN" in meta_channels and not "$PnS" in meta_channels: experiment.metadata["name_metadata"] = "$PnN" elif "$PnN" not in meta_channels and "$PnS" in meta_channels: experiment.metadata["name_metadata"] = "$PnS" else: PnN = meta_channels["$PnN"] PnS = meta_channels["$PnS"] # sometimes one is unique and the other isn't if (len(set(PnN)) == len(PnN) and len(set(PnS)) != len(PnS)): experiment.metadata["name_metadata"] = "$PnN" elif (len(set(PnN)) != len(PnN) and len(set(PnS)) == len(PnS)): experiment.metadata["name_metadata"] = "$PnS" else: # as per fcsparser.api, $PnN is the "short name" (like FL-1) # and $PnS is the "actual name" (like "FSC-H"). so let's # use $PnS. experiment.metadata["name_metadata"] = "$PnS" meta_channels.set_index(experiment.metadata["name_metadata"], inplace=True) channels = list(self.channels.keys()) if self.channels \ else list(tube0_meta["_channel_names_"]) # make sure everything in self.channels is in the tube channels for channel in channels: if channel not in meta_channels.index: raise util.CytoflowOpError( 'channels', "Channel {0} not in tube {1}".format( channel, self.tubes[0].file)) # now that we have the metadata, load it into experiment for channel in channels: experiment.add_channel(channel) experiment.metadata[channel]["fcs_name"] = channel # keep track of the channel's PMT voltage if ("$PnV" in meta_channels.loc[channel]): v = meta_channels.loc[channel]['$PnV'] if v: experiment.metadata[channel]["voltage"] = v # add the maximum possible value for this channel. data_range = meta_channels.loc[channel]['$PnR'] data_range = float(data_range) experiment.metadata[channel]['range'] = data_range experiment.metadata['fcs_metadata'] = {} for tube in self.tubes: tube_meta, tube_data = parse_tube(tube.file, experiment) if self.events: if self.events <= len(tube_data): tube_data = tube_data.loc[np.random.choice(tube_data.index, self.events, replace=False)] else: warnings.warn( "Only {0} events in tube {1}".format( len(tube_data), tube.file), util.CytoflowWarning) experiment.add_events(tube_data[channels], tube.conditions) experiment.metadata['fcs_metadata'][tube.file] = tube_meta for channel in channels: if self.channels and channel in self.channels: new_name = self.channels[channel] if channel == new_name: continue experiment.data.rename(columns={channel: new_name}, inplace=True) experiment.metadata[new_name] = experiment.metadata[channel] experiment.metadata[new_name]["fcs_name"] = channel del experiment.metadata[channel] return experiment
color='blue') self._cursor.connect_event('button_press_event', self._onclick) elif self._cursor: self._cursor.disconnect_events() self._cursor = None def _onclick(self, event): """Update the threshold location""" self.op.threshold = event.xdata if __name__ == '__main__': import cytoflow as flow import fcsparser tube1 = fcsparser.parse('../../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs', reformat_meta = True) tube2 = fcsparser.parse('../../cytoflow/tests/data/Plate01/CFP_Well_A4.fcs', reformat_meta = True) ex = flow.Experiment() ex.add_conditions({"Dox" : "float"}) ex.add_tube(tube1, {"Dox" : 10.0}) ex.add_tube(tube2, {"Dox" : 1.0}) hlog = flow.HlogTransformOp() hlog.name = "Hlog transformation" hlog.channels = ['Y2-A'] ex2 = hlog.apply(ex)
def estimate(self, experiment, subset = None): """ Estimate the calibration coefficients from the beads file. """ if not experiment: raise CytoflowOpError("No experiment specified") try: beads_meta, beads_data = fcsparser.parse(self.beads_file, reformat_meta = True) beads_channels = beads_meta["_channels_"].set_index("$PnN") except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\ .format(self.beads_file, e.value)) channels = self.units.keys() # make sure the voltages didn't change for channel in channels: exp_v = experiment.metadata[channel]['voltage'] if not "$PnV" in beads_channels.ix[channel]: raise CytoflowOpError("Didn't find a voltage for channel {0}" \ "in tube {1}".format(channel, self.beads_file)) control_v = beads_channels.ix[channel]['$PnV'] if control_v != exp_v: raise CytoflowOpError("Voltage differs for channel {0} in tube {1}" .format(channel, self.beads_file)) for channel in channels: data = beads_data[channel] # bin the data on a log scale data_range = experiment.metadata[channel]['range'] hist_bins = np.logspace(1, math.log(data_range, 2), num = 256, base = 2) hist = np.histogram(data, bins = hist_bins) # mask off-scale values hist[0][0] = 0 hist[0][-1] = 0 # smooth it with a Savitzky-Golay filter hist_smooth = scipy.signal.savgol_filter(hist[0], 5, 1) # find peaks peak_bins = scipy.signal.find_peaks_cwt(hist_smooth, widths = np.arange(3, 20), max_distances = np.arange(3, 20) / 2) # filter by height and intensity peak_threshold = np.percentile(hist_smooth, self.bead_peak_quantile) peak_bins_filtered = \ [x for x in peak_bins if hist_smooth[x] > peak_threshold and hist[1][x] > self.bead_brightness_threshold] peaks = [hist_bins[x] for x in peak_bins_filtered] mef_unit = self.units[channel] if not mef_unit in self.beads: raise CytoflowOpError("Invalid unit {0} specified for channel {1}".format(mef_unit, channel)) # "mean equivalent fluorochrome" mef = self.beads[mef_unit] if len(peaks) == 0: raise CytoflowOpError("Didn't find any peaks; check the diagnostic plot") elif len(peaks) > len(self.beads): raise CytoflowOpError("Found too many peaks; check the diagnostic plot") elif len(peaks) == 1: # if we only have one peak, assume it's the brightest peak self._coefficients[channel] = [mef[-1] / peaks[0]] elif len(peaks) == 2: # if we have only two peaks, assume they're the brightest two self._coefficients[channel] = \ [(mef[-1] - mef[-2]) / (peaks[1] - peaks[0])] else: # if there are n > 2 peaks, check all the contiguous n-subsets # of mef for the one whose linear regression with the peaks # has the smallest (norm) sum-of-residuals. # do it in log10 space because otherwise the brightest peaks # have an outsized influence. best_resid = np.inf for start, end in [(x, x+len(peaks)) for x in range(len(mef) - len(peaks) + 1)]: mef_subset = mef[start:end] # linear regression of the peak locations against mef subset lr = np.polyfit(np.log10(peaks), np.log10(mef_subset), deg = 1, full = True) resid = lr[1][0] if resid < best_resid: best_lr = lr[0] best_resid = resid self._coefficients[channel] = (best_lr[0], best_lr[1])
def main(samplesheet_path: str, output_path: str): samplesheet = pd.read_csv(samplesheet_path, sep="\t") seen_wells = set() for index, well in samplesheet.iterrows(): print(well["filepath"]) metadata, data = fcsparser.parse(well.filepath, meta_data_only=False, reformat_meta=True) intended_well = well.row + str(well.column).zfill(2) if intended_well != metadata["WELL_ID"]: warnings.warn( "The file {filepath} for plate {plate}, " "well {iwell} reports that it comes from well {awell}.".format( filepath=well["filepath"], plate=well["plate"], iwell=intended_well, awell=metadata["WELL_ID"])) well_identifier = (well.time, well.plate, well.row, well.column) if well_identifier in seen_wells: warnings.warn( "Plate {plate}, well {well} was listed more than " "once for timepoint {time} in the sample sheet.".format( plate=well["plate"], well=intended_well, time=well["time"])) else: seen_wells.add(well_identifier) df = pd.DataFrame({ "treatment_time": well["time"], "plate": well["plate"], "column": well["column"], "row": well["row"], "diamide": well["diamide"], "condition": well["condition"], "control": well["control"], "condition_fluor": well["condition_fluor"], "control_fluor": well["control_fluor"], "replicate": well["replicate"], "filepath": well["filepath"], "FSC_H": (data["FSC LinH"] if "FSC LinH" in data.columns else nan), "FSC_A": (data["FSC LinA"] if "FSC LinA" in data.columns else nan), "SSC_H": (data["SSC LinH"] if "SSC LinH" in data.columns else nan), "SSC_A": (data["SSC LinA"] if "SSC LinA" in data.columns else nan), "YFP_H": (data["FITC(530/30) LinH"] if "FITC(530/30) LinH" in data.columns else nan), "YFP_A": (data["FITC(530/30) LinA"] if "FITC(530/30) LinA" in data.columns else nan), "mCherry_H": (data["MCherry(615/30) LinH"] if "MCherry(615/30) LinH" in data.columns else nan), "mCherry_A": (data["MCherry(615/30) LinA"] if "MCherry(615/30) LinA" in data.columns else nan), "width": data["Width"], "cytometer_time": data["Time"] }) df.to_csv(output_path, sep="\t", index=False, mode="w" if index == 0 else "a", header=True if index == 0 else False, na_rep="NA") expected_wells = set(itertools.product([0,1,2], [1], string.ascii_uppercase[:8], range(1, 13))).union( \ set(itertools.product([0,1,2], [2], string.ascii_uppercase[:4], range(1, 13)))) missing_wells = expected_wells - seen_wells for missing_well in missing_wells: warnings.warn( "No data provided for timepoint {time}, plate {plate}, well {well}." .format(time=missing_well[0], plate=missing_well[1], well=missing_well[2] + str(missing_well[3])))
def _on_add_tubes(self): """ Handle "Add tubes..." button. Add tubes to the experiment. """ # TODO - adding a set of files, then a condition, then another # set doesn't work. file_dialog = FileDialog() file_dialog.wildcard = "Flow cytometry files (*.fcs)|*.fcs|" file_dialog.action = "open files" file_dialog.open() if file_dialog.return_code != PyfaceOK: return for path in file_dialog.paths: try: tube_meta = fcsparser.parse(path, meta_data_only=True, reformat_meta=True) # tube_channels = tube_meta["_channels_"].set_index("$PnN") except Exception as e: raise RuntimeError("FCS reader threw an error on tube {0}: {1}".format(path, e.value)) # if we're the first tube loaded, create a dummy experiment if not self.model.dummy_experiment: self.model.dummy_experiment = ImportOp(tubes=[CytoflowTube(file=path)], coarse_events=1).apply() # check the next tube against the dummy experiment try: check_tube(path, self.model.dummy_experiment) except util.CytoflowError as e: error(None, e.__str__(), "Error importing tube") return tube = Tube() for trait_name, trait in self.model.tube_traits.items(): # TODO - do we still need to check for transient? tube.add_trait(trait_name, trait) # this magic makes sure the trait is actually defined # in tube.__dict__, so it shows up in trait_names etc. tube.trait_set(**{trait_name: trait.default_value}) if trait.condition: tube.on_trait_change(self._try_multiedit, trait_name) tube.trait_set(file=path, parent=self.model) if "$SRC" in tube_meta: self._add_metadata("$SRC", "$SRC", Str(condition=False)) tube.trait_set(**{"$SRC": tube_meta["$SRC"]}) if "TUBE NAME" in tube_meta: self._add_metadata("TUBE NAME", "TUBE NAME", Str(condition=False)) tube.trait_set(**{"TUBE NAME": tube_meta["TUBE NAME"]}) if "$SMNO" in tube_meta: self._add_metadata("$SMNO", "$SMNO", Str(condition=False)) tube.trait_set(**{"$SMNO": tube_meta["SMNO"]}) self.model.tubes.append(tube) self.btn_add_cond.setEnabled(True)
def test_parse(self): """Verify that the fcs parser behaves as expected.""" self.maxDiff = None meta = parse(test_data_file, meta_data_only=True) expected_meta = { u'$BEGINANALYSIS': u'0', u'$BEGINDATA': u'1892', u'$BEGINSTEXT': u'0', u'$BTIM': u'11:47:24', u'$BYTEORD': u'1,2,3,4', u'$CELLS': u'PID_101_MG1655_Transformants_D01', u'$CYT': u'MACSQuant', u'$CYTSN': u'3057', u'$DATATYPE': u'F', u'$DATE': u'2013-Jul-19', u'$ENDANALYSIS': u'0', u'$ENDDATA': u'641891', u'$ENDSTEXT': u'0', u'$ETIM': u'11:47:46', u'$FIL': u'EY_2013-07-19_PID_101_MG1655_Transformants_D01_Well_A3.001.fcs', u'$MODE': u'L', u'$NEXTDATA': 0, u'$OP': u'Eugene', u'$P10B': 32, u'$P10E': u'0.000000,0.000000', u'$P10G': u'1', u'$P10N': u'V2-W', u'$P10R': u'262144', u'$P10S': u'V2-W', u'$P11B': 32, u'$P11E': u'0.000000,0.000000', u'$P11G': u'1', u'$P11N': u'Y2-A', u'$P11R': u'262144', u'$P11S': u'Y2-A', u'$P12B': 32, u'$P12E': u'0.000000,0.000000', u'$P12G': u'1', u'$P12N': u'Y2-H', u'$P12R': u'262144', u'$P12S': u'Y2-H', u'$P13B': 32, u'$P13E': u'0.000000,0.000000', u'$P13G': u'1', u'$P13N': u'Y2-W', u'$P13R': u'262144', u'$P13S': u'Y2-W', u'$P14B': 32, u'$P14E': u'0.000000,0.000000', u'$P14G': u'1', u'$P14N': u'B1-A', u'$P14R': u'262144', u'$P14S': u'B1-A', u'$P15B': 32, u'$P15E': u'0.000000,0.000000', u'$P15G': u'1', u'$P15N': u'B1-H', u'$P15R': u'262144', u'$P15S': u'B1-H', u'$P16B': 32, u'$P16E': u'0.000000,0.000000', u'$P16G': u'1', u'$P16N': u'B1-W', u'$P16R': u'262144', u'$P16S': u'B1-W', u'$P1B': 32, u'$P1E': u'0.000000,0.000000', u'$P1G': u'1', u'$P1N': u'HDR-T', u'$P1R': u'262144', u'$P1S': u'HDR-T', u'$P2B': 32, u'$P2E': u'0.000000,0.000000', u'$P2G': u'1', u'$P2N': u'FSC-A', u'$P2R': u'262144', u'$P2S': u'FSC-A', u'$P3B': 32, u'$P3E': u'0.000000,0.000000', u'$P3G': u'1', u'$P3N': u'FSC-H', u'$P3R': u'262144', u'$P3S': u'FSC-H', u'$P4B': 32, u'$P4E': u'0.000000,0.000000', u'$P4G': u'1', u'$P4N': u'FSC-W', u'$P4R': u'262144', u'$P4S': u'FSC-W', u'$P5B': 32, u'$P5E': u'0.000000,0.000000', u'$P5G': u'1', u'$P5N': u'SSC-A', u'$P5R': u'262144', u'$P5S': u'SSC-A', u'$P6B': 32, u'$P6E': u'0.000000,0.000000', u'$P6G': u'1', u'$P6N': u'SSC-H', u'$P6R': u'262144', u'$P6S': u'SSC-H', u'$P7B': 32, u'$P7E': u'0.000000,0.000000', u'$P7G': u'1', u'$P7N': u'SSC-W', u'$P7R': u'262144', u'$P7S': u'SSC-W', u'$P8B': 32, u'$P8E': u'0.000000,0.000000', u'$P8G': u'1', u'$P8N': u'V2-A', u'$P8R': u'262144', u'$P8S': u'V2-A', u'$P9B': 32, u'$P9E': u'0.000000,0.000000', u'$P9G': u'1', u'$P9N': u'V2-H', u'$P9R': u'262144', u'$P9S': u'V2-H', u'$PAR': 16, u'$SRC': u'A3', u'$SYS': u'MACSQuantify,2.4.1247.1dev', u'$TOT': 10000, '__header__': {'FCS format': b'FCS3.0', 'analysis end': 0, 'analysis start': 0, 'data end': 641891, 'data start': 1892, 'text end': 1824, 'text start': 256} } self.assertEqual(meta, expected_meta) meta, df = parse(test_data_file, meta_data_only=False) self.assertEqual(meta, expected_meta) expected_columns = [u'HDR-T', u'FSC-A', u'FSC-H', u'FSC-W', u'SSC-A', u'SSC-H', u'SSC-W', u'V2-A', u'V2-H', u'V2-W', u'Y2-A', u'Y2-H', u'Y2-W', u'B1-A', u'B1-H', u'B1-W'] self.assertListEqual(df.columns.tolist(), expected_columns) # Verify that a few selected value fo the data resolve to their expected values. subset_of_data = df.iloc[:3, :3].values expected_values = np.array([[2.0185113, 459.96298, 437.35455], [27.451754, -267.17465, 365.35455], [32.043865, -201.58234, 501.35455]], dtype=np.float32) assert_array_almost_equal(subset_of_data, expected_values)
index = new_data.index, dtype = meta_type) # if we're categorical, merge the categories if meta_type == "category" and meta_name in self.data.columns: cats = set(self.data[meta_name].cat.categories) | set(new_data[meta_name].cat.categories) self.data[meta_name] = self.data[meta_name].cat.set_categories(cats) new_data[meta_name] = new_data[meta_name].cat.set_categories(cats) except (ValueError, TypeError): raise util.CytoflowError("Had trouble converting conditions {1}" "(value = {2}) to type {3}" \ .format(meta_name, meta_value, meta_type)) self.data = self.data.append(new_data, ignore_index = True) del new_data if __name__ == "__main__": import fcsparser ex = Experiment() ex.add_conditions({"time" : "category"}) tube0, _ = fcsparser.parse('../cytoflow/tests/data/tasbe/BEADS-1_H7_H07_P3.fcs') tube1, _ = fcsparser.parse('../cytoflow/tests/data/tasbe/beads.fcs') tube2, _ = fcsparser.parse('../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs') ex.add_tube(tube1, {"time" : "one"}) ex.add_tube(tube2, {"time" : "two"})
hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), # something buggy here. #orient = ("h" if self.orientation == "horizontal" else "v"), estimator = self.function, ci = None, kind = "bar") if __name__ == '__main__': import cytoflow as flow import fcsparser tube1 = fcsparser.parse('../../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs', reformat_meta = True, channel_naming = "$PnN") tube2 = fcsparser.parse('../../cytoflow/tests/data/Plate01/CFP_Well_A4.fcs', reformat_meta = True, channel_naming = "$PnN") tube3 = fcsparser.parse('../../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs', reformat_meta = True, channel_naming = "$PnN") tube4 = fcsparser.parse('../../cytoflow/tests/data/Plate01/CFP_Well_A4.fcs', reformat_meta = True, channel_naming = "$PnN") ex = flow.Experiment()
pd.Series(data = [meta_value] * len(new_data), index = new_data.index, dtype = meta_type) # if we're categorical, merge the categories if is_categorical_dtype(meta_type) and meta_name in self.data: cats = set(self.data[meta_name].cat.categories) | set( new_data[meta_name].cat.categories) self.data[meta_name] = self.data[meta_name].cat.set_categories( cats) new_data[meta_name] = new_data[meta_name].cat.set_categories( cats) self.data = self.data.append(new_data, ignore_index=True) del new_data if __name__ == "__main__": import fcsparser ex = Experiment() ex.add_conditions({"time": "category"}) tube0, _ = fcsparser.parse( '../cytoflow/tests/data/tasbe/BEADS-1_H7_H07_P3.fcs') tube1, _ = fcsparser.parse('../cytoflow/tests/data/tasbe/beads.fcs') tube2, _ = fcsparser.parse( '../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs') ex.add_tube(tube1, {"time": "one"}) ex.add_tube(tube2, {"time": "two"})
n_samples, n_groups, n_dim) data, labels = generate_test_data(n_samples, n_dim, n_groups=n_clusters) sample_names = [str(l) for l in labels] else: analysis_title = pathlib.Path(filenames[0]).stem dataset = [] labelset = [] sample_names = [] for filename in filenames: if verbose: sys.stderr.write("{} loading file : {}\n".format( datetime.datetime.now().strftime("[%H:%M:%S]"), filename)) if filename.endswith(".fcs"): import fcsparser meta, data = fcsparser.parse(filename) data = data.T color2name = {} nondata_columns = ['EQBeads', 'Time', 'Width', 'Event'] for key, val in meta.items(): if isinstance(val, str) is False: continue m = re.match("\\$(\\w\\d+)N", key) if m: color_code = m.group(1) color_name_code = "$" + color_code + "S" if color_name_code in meta: name = meta[color_name_code] if name in nondata_columns: continue color2name[val] = meta[color_name_code]
def estimate(self, experiment, subset = None): """ Estimate the bleedthrough from the single-channel controls in `controls` """ if not experiment: raise CytoflowOpError("No experiment specified") if self.num_knots < 3: raise CytoflowOpError("Need to allow at least 3 knots in the spline") self._channels = self.controls.keys() for channel in self._channels: try: tube_meta = fcsparser.parse(self.controls[channel], meta_data_only = True, reformat_meta = True) tube_channels = tube_meta["_channels_"].set_index("$PnN") except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\ .format(self.controls[channel], e.value)) for channel in self._channels: exp_v = experiment.metadata[channel]['voltage'] if not "$PnV" in tube_channels.ix[channel]: raise CytoflowOpError("Didn't find a voltage for channel {0}" "in tube {1}".format(channel, self.controls[channel])) control_v = tube_channels.ix[channel]["$PnV"] if control_v != exp_v: raise CytoflowOpError("Voltage differs for channel {0} in tube {1}" .format(channel, self.controls[channel])) self._splines = {} mesh_axes = [] for channel in self._channels: self._splines[channel] = {} try: tube_meta, tube_data = fcsparser.parse(self.controls[channel], reformat_meta = True) tube_channels = tube_meta["_channels_"].set_index("$PnN") except Exception as e: raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\ .format(self.controls[channel], e.value)) data = tube_data.sort(channel) for af_channel in self._channels: if 'af_median' in experiment.metadata[af_channel]: data[af_channel] = data[af_channel] - \ experiment.metadata[af_channel]['af_median'] channel_min = data[channel].min() channel_max = data[channel].max() # we're going to set the knots and splines evenly across the hlog- # transformed data, so as to capture both the "linear" aspect # of near-0 and negative values, and the "log" aspect of large # values # parameterize the hlog transform r = experiment.metadata[channel]['range'] # instrument range d = np.log10(r) # maximum display scale, in decades # the transition point from linear --> log scale # use half of the log-transformed scale as "linear". b = 2 ** (np.log2(r) / 2) # the splines' knots knot_min = channel_min knot_max = channel_max hlog_knot_min, hlog_knot_max = \ hlog((knot_min, knot_max), b = b, r = r, d = d) hlog_knots = np.linspace(hlog_knot_min, hlog_knot_max, self.num_knots) knots = hlog_inv(hlog_knots, b = b, r = r, d = d) # only keep the interior knots knots = knots[1:-1] # the interpolators' mesh mesh_min = -3 * experiment.metadata[channel]['af_stdev'] mesh_max = r hlog_mesh_min, hlog_mesh_max = \ hlog((mesh_min, mesh_max), b = b, r = r, d = d) hlog_mesh_axis = \ np.linspace(hlog_mesh_min, hlog_mesh_max, self.mesh_size) mesh_axis = hlog_inv(hlog_mesh_axis, b = b, r = r, d = d) mesh_axes.append(mesh_axis) for to_channel in self._channels: from_channel = channel if from_channel == to_channel: continue self._splines[from_channel][to_channel] = \ scipy.interpolate.LSQUnivariateSpline(data[from_channel].values, data[to_channel].values, t = knots, k = 1) mesh = pandas.DataFrame(cartesian(mesh_axes), columns = [x for x in self._channels]) mesh_corrected = mesh.apply(_correct_bleedthrough, axis = 1, args = ([[x for x in self._channels], self._splines])) for channel in self._channels: chan_values = np.reshape(mesh_corrected[channel], [len(x) for x in mesh_axes]) self._interpolators[channel] = \ scipy.interpolate.RegularGridInterpolator(mesh_axes, chan_values)