Exemple #1
0
    def setUp(self):
        import os

        cwd = os.path.dirname(os.path.abspath(__file__))
        self.ex = flow.Experiment()
        self.ex.add_conditions({"time": "float"})
        self.tube1 = fcsparser.parse(cwd + "/data/Plate01/RFP_Well_A3.fcs", reformat_meta=True)
        self.tube2 = fcsparser.parse(cwd + "/data/Plate01/CFP_Well_A4.fcs", reformat_meta=True)
Exemple #2
0
    def load_well(label):
        # Short-circuit the case where the well has already been loaded, which 
        # is triggered by the "from" external reference machinery.

        if isinstance(label, Well):
            return label

        # Parse well and plate names from the given label.  The plate name is 
        # optional, because often there is only one.

        plate, well = parse_well_label(label)

        # Find the *.fcs file referenced by the given label.

        if plate not in plates:
            raise UsageError(
                    "Plate '{}' not defined.".format(plate)
                    if plate is not None else
                    "No default plate defined.")

        plate_path = plates[plate]
        well_paths = list(plate_path.glob(well_glob.format(well)))
        if len(well_paths) == 0:
            raise UsageError("No *.fcs files found for well '{}'".format(label))
        if len(well_paths) > 1:
            raise UsageError("Multiple *.fcs files found for well '{}'".format(label))
        well_path = well_paths[0]

        # Load the cell data for the given well.
        
        logging.info('Loading {}'.format(well_path.name))
        meta, data = fcsparser.parse(str(well_path))
        return Well(label, meta, data)
    def default_view(self):
        """
        Returns a diagnostic plot to see if the bleedthrough spline estimation
        is working.
        
        Returns
        -------
            IView : An IView, call plot() to see the diagnostic plots
        """
        
        if set(self.controls.keys()) != set(self._splines.keys()):
            raise CytoflowOpError("Must have both the controls and bleedthrough to plot")
 
        channels = self.controls.keys()
        
        # make sure we can get the control tubes to plot the diagnostic
        for channel in channels:       
            try:
                _ = fcsparser.parse(self.controls[channel], 
                                    meta_data_only = True, 
                                    reformat_meta = True)
            except Exception as e:
                raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\
                                   .format(self.controls[channel], e.value))

        return BleedthroughPiecewiseDiagnostic(op = self)
Exemple #4
0
def check_tube(filename, experiment, ignore_v = False):
    try:
        tube_meta = fcsparser.parse( filename, 
                                     channel_naming = experiment.metadata["name_metadata"],
                                     meta_data_only = True,
                                     reformat_meta = True)
    except Exception as e:
        raise util.CytoflowOpError("FCS reader threw an error reading metadata "
                              " for tube {0}: {1}"
                              .format(filename, str(e)))
    
    # first make sure the tube has the right channels    
    if set(tube_meta["_channel_names_"]) != set(experiment.channels):
        raise util.CytoflowError("Tube {0} doesn't have the same channels "
                           "as the first tube added".format(filename))
     
    tube_channels = tube_meta["_channels_"]
    tube_channels.set_index(experiment.metadata["name_metadata"], 
                            inplace = True)
     
    # next check the per-channel parameters
    for channel in experiment.channels:        
        # first check voltage
        if "voltage" in experiment.metadata[channel]:    
            if not "$PnV" in tube_channels.ix[channel]:
                raise util.CytoflowError("Didn't find a voltage for channel {0}" \
                                   "in tube {1}".format(channel, filename))
            
            old_v = experiment.metadata[channel]["voltage"]
            new_v = tube_channels.ix[channel]['$PnV']
            
            if old_v != new_v and not ignore_v:
                raise util.CytoflowError("Tube {0} doesn't have the same voltages"
                                    .format(filename))
def get_one_fcs(track_nextdata):
    meta, data = fcsparser.parse(path, dataset_start=track_nextdata, reformat_meta=False)
    print "sampleID:", meta["GTI$SAMPLEID"]

    if meta["$NEXTDATA"] == 0:
        return -1
    else:
        return track_nextdata + meta["$NEXTDATA"]
    def init_model(self, op):
        
        dtype_to_trait = {"category" : Str,
                          "float" : Float,
                          "bool" : Bool,
                          "int" : Int}
        
        for op_tube in op.tubes:
            tube = Tube(file = op_tube.file,
                        parent = self)
            
            # first load the tube's metadata and set special columns
            try:
                tube_meta = fcsparser.parse(op_tube.file, 
                                            meta_data_only = True, 
                                            reformat_meta = True)
                #tube_channels = tube_meta["_channels_"].set_index("$PnN")
            except Exception as e:
                error(None, "FCS reader threw an error on tube {0}: {1}"\
                            .format(op_tube.file, e.value),
                      "Error reading FCS file")
                return
            
            # if we're the first tube loaded, create a dummy experiment
            if not self.dummy_experiment:
                self.dummy_experiment = ImportOp(tubes = [op_tube],
                                                 conditions = op.conditions,
                                                 coarse_events = 1).apply()
                
            if '$SRC' in tube_meta:    
                self.tube_traits["$SRC"] = Str(condition = False)
                tube.add_trait("$SRC", Str(condition = False))
                tube.trait_set(**{"$SRC" : tube_meta['$SRC']})
                
            if 'TUBE NAME' in tube_meta:
                #self._add_metadata("TUBE NAME", "TUBE NAME", Str(condition = False))
                self.tube_traits["TUBE NAME"] = Str(condition = False)
                tube.add_trait("TUBE NAME", Str(condition = False))
                tube.trait_set(**{"TUBE NAME" : tube_meta['TUBE NAME']})
                
            if '$SMNO' in tube_meta:
                #self._add_metadata("$SMNO", "$SMNO", Str(condition = False))
                self.tube_traits["$SMNO"] = Str(condition = False)
                tube.add_trait("$SMNO", Str(condition = False))
                tube.trait_set(**{"$SMNO" : tube_meta['SMNO']})

            # next set conditions
            for condition in op_tube.conditions:
                condition_dtype = op.conditions[condition]
                condition_trait = \
                    dtype_to_trait[condition_dtype](condition = True)
                tube.add_trait(condition, condition_trait)
                if not condition in self.tube_traits:
                    self.tube_traits[condition] = condition_trait
            tube.trait_set(**op_tube.conditions)
            
            self.tubes.append(tube)
Exemple #7
0
    def init_model(self, op):

        # I DON'T KNOW WHY THIS STICKS AROUND ACROSS DIALOG INVOCATIONS.
        del self.tubes[:]

        dtype_to_trait = {"category": Str, "float": Float, "log": LogFloat, "bool": Bool, "int": Int}

        for op_tube in op.tubes:
            tube = Tube(file=op_tube.file, parent=self)

            # first load the tube's metadata and set special columns
            try:
                tube_meta = fcsparser.parse(op_tube.file, meta_data_only=True, reformat_meta=True)
                # tube_channels = tube_meta["_channels_"].set_index("$PnN")
            except Exception as e:
                error(
                    None,
                    "FCS reader threw an error on tube {0}: {1}".format(op_tube.file, e.value),
                    "Error reading FCS file",
                )
                return

            if "$SRC" in tube_meta:
                self.tube_traits["$SRC"] = Str(condition=False)
                tube.add_trait("$SRC", Str(condition=False))
                tube.trait_set(**{"$SRC": tube_meta["$SRC"]})

            if "TUBE NAME" in tube_meta:
                # self._add_metadata("TUBE NAME", "TUBE NAME", Str(condition = False))
                self.tube_traits["TUBE NAME"] = Str(condition=False)
                tube.add_trait("TUBE NAME", Str(condition=False))
                tube.trait_set(**{"TUBE NAME": tube_meta["TUBE NAME"]})

            if "$SMNO" in tube_meta:
                # self._add_metadata("$SMNO", "$SMNO", Str(condition = False))
                self.tube_traits["$SMNO"] = Str(condition=False)
                tube.add_trait("$SMNO", Str(condition=False))
                tube.trait_set(**{"$SMNO": tube_meta["SMNO"]})

            # next set conditions
            for condition in op_tube.conditions:
                condition_dtype = op.conditions[condition]
                condition_trait = dtype_to_trait[condition_dtype](condition=True)
                tube.add_trait(condition, condition_trait)
                if not condition in self.tube_traits:
                    self.tube_traits[condition] = condition_trait
            tube.trait_set(**op_tube.conditions)

            # if we're the first tube loaded, create a dummy experiment
            # to validate voltage, etc for later tubes
            if not self.dummy_experiment:
                self.model.dummy_experiment = ImportOp(tubes=[CytoflowTube(file=op_tube.file)], coarse_events=1).apply()

            self.tubes.append(tube)
Exemple #8
0
def parse_tube(filename, experiment, ignore_v = False):   
    
    check_tube(filename, experiment, ignore_v)
         
    try:
        _, tube_data = fcsparser.parse(
                            filename, 
                            channel_naming = experiment.metadata["name_metadata"])
    except Exception as e:
        raise util.CytoflowOpError("FCS reader threw an error reading data for tube "
                              "{0}: {1}".format(filename, str(e)))
            
    return tube_data
Exemple #9
0
 def _on_add_tubes(self):
     """
     Handle "Add tubes..." button.  Add tubes to the experiment.
     """
     
     # TODO - adding a set of files, then a condition, then another
     # set doesn't work.
     
     file_dialog = FileDialog()
     file_dialog.wildcard = "Flow cytometry files (*.fcs)|*.fcs|"
     file_dialog.action = 'open files'
     file_dialog.open()
     
     if file_dialog.return_code != PyfaceOK:
         return
     
     for path in file_dialog.paths:
         try:
             tube_meta = fcsparser.parse(path, 
                                         meta_data_only = True, 
                                         reformat_meta = True)
             tube_channels = tube_meta["_channels_"].set_index("$PnN")
         except Exception as e:
             raise RuntimeError("FCS reader threw an error on tube {0}: {1}"\
                                .format(path, e.value))
             
         tube = Tube()
         
         for trait_name, trait in self.model.tube_traits.items():
             # TODO - do we still need to check for transient?
             tube.add_trait(trait_name, trait)
             
             # this magic makes sure the trait is actually defined
             # in tube.__dict__, so it shows up in trait_names etc.
             tube.trait_set(**{trait_name : trait.default_value})
             if trait.condition:
                 tube.on_trait_change(self._try_multiedit, trait_name)
             
         tube.trait_set(Source = tube_meta['$SRC'],
                        _file = path,
                        _parent = self.model)
         
         if 'TUBE NAME' in tube_meta:
             tube.Tube = tube_meta['TUBE NAME']
         elif '$SMNO' in tube_meta:
             tube.Tube = tube_meta['$SMNO']
         
         self.model.tubes.append(tube)
Exemple #10
0
def test_fcs():
    path = fcsparser.test_sample_path
    meta, data = fcsparser.parse(path)
    _, _, X = scprep.io.load_fcs(path)
    assert "Time" not in X.columns
    assert len(set(X.columns).difference(data.columns)) == 0
    np.testing.assert_array_equal(X.index, data.index)
    np.testing.assert_array_equal(X.to_numpy(), data[X.columns].to_numpy())
    _, _, X = scprep.io.load_fcs(path, sparse=True)
    assert "Time" not in X.columns
    assert len(set(X.columns).difference(data.columns)) == 0
    np.testing.assert_array_equal(X.index, data.index)
    np.testing.assert_array_equal(X.sparse.to_dense().to_numpy(),
                                  data[X.columns].to_numpy())

    X_meta, _, X = scprep.io.load_fcs(path, reformat_meta=False, override=True)
    _assert_fcs_meta_equal(meta, X_meta, reformat_meta=False)
Exemple #11
0
    def plot(self, experiment = None, **kwargs):
        """Plot a faceted histogram view of a channel"""
      
        try:
            beads_meta, beads_data = fcsparser.parse(self.op.beads_file, 
                                            reformat_meta = True)
            beads_channels = beads_meta["_channels_"].set_index("$PnN")
        except Exception as e:
            raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\
                               .format(self.op.beads_file, e.value))

        plt.figure()
        
        channels = self.op.units.keys()
        
        for idx, channel in enumerate(channels):
            data = beads_data[channel]
            
            # bin the data on a log scale
            data_range = float(beads_channels.ix[channel]['$PnR'])
            hist_bins = np.logspace(1, math.log(data_range, 2), num = 256, base = 2)
            hist = np.histogram(data, bins = hist_bins)
            
            # mask off-scale values
            hist[0][0] = 0
            hist[0][-1] = 0
            
            hist_smooth = scipy.signal.savgol_filter(hist[0], 5, 1)
            
            # find peaks
            peak_bins = scipy.signal.find_peaks_cwt(hist_smooth, 
                                                    widths = np.arange(3, 20),
                                                    max_distances = np.arange(3, 20) / 2)
            
            # filter by height and intensity
            peak_threshold = np.percentile(hist_smooth, self.op.bead_peak_quantile)
            peak_bins_filtered = \
                [x for x in peak_bins if hist_smooth[x] > peak_threshold
                 and hist[1][x] > self.op.bead_brightness_threshold]
                
            plt.subplot(len(channels), 1, idx+1)
            plt.xscale('log')
            plt.xlabel(channel)
            plt.plot(hist_bins[1:], hist_smooth)
            for peak in peak_bins_filtered:
                plt.axvline(hist_bins[peak], color = 'r')
Exemple #12
0
def concatenate_fcs(input_dir):
    txt_filelist = [f for f in os.listdir(input_dir) if f.endswith(".txt")]
    fcs_filelist = [f for f in os.listdir(input_dir) if f.endswith(".fcs")]
    filelist = txt_filelist + fcs_filelist
    if len(filelist) == 0:
        sys.exit(f"ERROR: There are no files in {input_dir}!")
    no_arc = pd.DataFrame()
    #Add counter to keep track of the number of files in input ->
    # -> cell ID will be a mix of these (Filenumber | filename.txt)
    fcounter = 0
    for i in filelist:
        file_path = f"{input_dir}/{i}"
        name = i.split('.')[0]
        fcounter += 1
        if i in txt_filelist:
            print(i)
            df = pd.read_csv(file_path, sep='\t')
        else:
            try:  #Use fcsparser to read the fcs data files
                print(i)
                df = fcsparser.parse(file_path, meta_data_only=False)[1]
                reg_pnn = re.compile("(\d+Di$)")  #Detect if, despite flag
                pnn_extracted = []  #columns match PnN pattern
                for n in df.columns.values.tolist():
                    if reg_pnn.search(n):
                        pnn_extracted.append(n)
                if len(pnn_extracted) != 0:
                    raise fcsparser.api.ParserFeatureNotImplementedError
            except fcsparser.api.ParserFeatureNotImplementedError:
                print("WARNING: Non-standard .fcs file detected: ", i)
                #use rpy2 to read the files and load into python
                df = read_rFCS(file_path)[0]

        # add a new column of 'file_origin' that will be used to separate each file after umap calculation
        df["file_identifier"] = name
        df["file_origin"] = str(fcounter) + " | " + name
        #File+ID #This way the cell-index will be preserved after Cytobank upload
        try:
            df["Sample_ID-Cell_Index"] = df["Cell_Index"].apply(
                lambda x: str(fcounter) + "-" + str(x))
        except KeyError:
            sys.exit(
                "ERROR: Cell_Index missing from data. Have you preprocessed it?"
            )
        no_arc = no_arc.append(df, ignore_index=True)
    return no_arc, filelist
def get_clusters(sample_name):
    #all_neighbors = list(reversed(range(5, 100+1)))
    all_neighbors = list(reversed(range(5, 500 + 1, 5)))
    nn_nums = []
    num_clusters = []
    for nn in all_neighbors:
        try:
            _, data = fcsparser.parse(data_dir + '/' + sample_name + '/' +
                                      str(nn) + '/out/' + sample_name + '.fcs')
            clusters = len(np.unique(data['cluster_id']))
            if clusters < 300:  # hacky method right now to reduce size of tree
                num_clusters.append(clusters)
                nn_nums.append(nn)
        except:
            continue

    return num_clusters, nn_nums
Exemple #14
0
def load_fcs(filename,
             gene_names=True,
             cell_names=True,
             sparse=None,
             metadata_channels=[
                 'Time', 'Event_length', 'DNA1', 'DNA2', 'Cisplatin',
                 'beadDist', 'bead1'
             ]):
    """Load a fcs file

    Parameters
    ----------
    filename : str
        The name of the fcs file to be loaded
    gene_names : `bool`, `str`, array-like, or `None` (default: True)
        If `True`, we assume gene names are contained in the file. Otherwise
        expects a filename or an array containing a list of gene symbols or ids
    cell_names : `bool`, `str`, array-like, or `None` (default: True)
        If `True`, we assume cell names are contained in the file. Otherwise
        expects a filename or an array containing a list of cell barcodes.
    sparse : bool, optional (default: None)
        If True, loads the data as a pd.SparseDataFrame. This uses less memory
        but more CPU.
    metadata_channels : list-like, optional (default: ['Time', 'Event_length', 'DNA1', 'DNA2', 'Cisplatin', 'beadDist', 'bead1'])
        Channels to be excluded from the data

    Returns
    -------
    data : pd.DataFrame
    """
    if cell_names is True:
        cell_names = None
    if gene_names is True:
        gene_names = None
    # Parse the fcs file
    meta, data = fcsparser.parse(filename)
    metadata_channels = data.columns.intersection(metadata_channels)
    data_channels = data.columns.difference(metadata_channels)
    metadata = data[metadata_channels]
    data = data[data_channels]
    data = _matrix_to_data_frame(data,
                                 gene_names=gene_names,
                                 cell_names=cell_names,
                                 sparse=sparse)
    return metadata, data
Exemple #15
0
def get_data(fn, sample=0, return_rawfile=False):
    """Return DataFrame of an FCS file."""
    meta, x = fcsparser.parse(fn)
    if return_rawfile:
        return x

    x = x.iloc[:, args.cols]

    newvals = asinh(x)
    x = pd.DataFrame(newvals, columns=x.columns)

    if sample:
        r = list(range(x.shape[0]))
        np.random.shuffle(r)
        r = r[:sample]
        x = x.iloc[r, :]

    return x
Exemple #16
0
    def apply(self, experiment = None):
        
        if not self.tubes or len(self.tubes) == 0:
            raise CytoflowOpError("Must specify some tubes!")
        
        # make sure each tube has the same conditions
        tube0_conditions = set(self.tubes[0].conditions)
        for tube in self.tubes:
            tube_conditions = set(tube.conditions)
            if len(tube0_conditions ^ tube_conditions) > 0:
                raise CytoflowOpError("Tube {0} didn't have the same "
                                      "conditions as tube {1}"
                                      .format(tube.file, self.tubes[0].file))

        # make sure experimental conditions are unique
        for idx, i in enumerate(self.tubes[0:-1]):
            for j in self.tubes[idx+1:]:
                if i.conditions_equal(j):
                    raise CytoflowOpError("The same conditions specified for "
                                          "tube {0} and tube {1}"
                                          .format(i.file, j.file))
        
        experiment = Experiment()
            
        for condition, dtype in self.conditions.items():
            is_log = False
            if dtype == "log":
                is_log = True
                dtype = "float"
            experiment.add_conditions({condition : dtype})
            if is_log:
                experiment.metadata[condition]["repr"] = "log"
        
        for tube in self.tubes:
            tube_fc = fcsparser.parse(tube.file, reformat_meta = True)
            if self.coarse:
                tube_meta, tube_data = tube_fc
                tube_data = tube_data.loc[np.random.choice(tube_data.index,
                                                           self.coarse_events,
                                                           replace = False)]
                tube_fc = (tube_meta, tube_data)
            experiment.add_tube(tube_fc, tube.conditions, ignore_v = self.ignore_v)
            
        return experiment
Exemple #17
0
def check_tube(filename, experiment, data_set=0):

    if experiment is None:
        raise util.CytoflowError("No experiment specified")

    ignore_v = experiment.metadata['ignore_v']

    try:
        tube_meta = fcsparser.parse(
            filename,
            channel_naming=experiment.metadata["name_metadata"],
            data_set=data_set,
            meta_data_only=True,
            reformat_meta=True)
    except Exception as e:
        raise util.CytoflowError("FCS reader threw an error reading metadata "
                                 "for tube {0}".format(filename)) from e

    # first make sure the tube has the right channels
    if not set(
        [experiment.metadata[c]["fcs_name"]
         for c in experiment.channels]) <= set(tube_meta["_channel_names_"]):
        raise util.CytoflowError(
            "Tube {0} doesn't have the same channels".format(filename))

    tube_channels = tube_meta["_channels_"]
    tube_channels.set_index(experiment.metadata["name_metadata"], inplace=True)

    # next check the per-channel parameters
    for channel in experiment.channels:
        fcs_name = experiment.metadata[channel]["fcs_name"]
        # first check voltage
        if "voltage" in experiment.metadata[channel]:
            if not "$PnV" in tube_channels.loc[fcs_name]:
                raise util.CytoflowError("Didn't find a voltage for channel {0}" \
                                   "in tube {1}".format(channel, filename))

            old_v = experiment.metadata[channel]["voltage"]
            new_v = tube_channels.loc[fcs_name]['$PnV']

            if old_v != new_v and not channel in ignore_v:
                raise util.CytoflowError(
                    "Tube {0} doesn't have the same voltages for channel ".
                    format(filename) + str(channel))
 def plot(self, experiment = None, **kwargs):
     """Plot a faceted histogram view of a channel"""
     
     kwargs.setdefault('histtype', 'stepfilled')
     kwargs.setdefault('alpha', 0.5)
     kwargs.setdefault('antialiased', True)
      
     plt.figure()
     
     channels = self.op._splines.keys()
     num_channels = len(channels)
     
     for from_idx, from_channel in enumerate(channels):
         for to_idx, to_channel in enumerate(channels):
             if from_idx == to_idx:
                 continue
             
             try:
                 _, tube_data = fcsparser.parse(self.op.controls[from_channel], 
                                                reformat_meta = True)
             except Exception as e:
                 raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\
                                       .format(self.op.controls[from_channel], e.value))
          
             plt.subplot(num_channels, 
                         num_channels, 
                         from_idx + (to_idx * num_channels) + 1)
             plt.xscale('log', nonposx='mask')
             plt.yscale('log', nonposy='mask')
             plt.xlabel(from_channel)
             plt.ylabel(to_channel)
             plt.scatter(tube_data[from_channel],
                         tube_data[to_channel],
                         alpha = 0.1,
                         s = 1,
                         marker = 'o')
             
             spline = self.op._splines[from_channel][to_channel]
             xs = np.logspace(-1, math.log(tube_data[from_channel].max(), 10))
         
             plt.plot(xs, 
                      spline(xs), 
                      'g-', 
                      lw=3)
Exemple #19
0
def loadDeepCyTOFData(dataPath,
                      dataIndex,
                      relevantMarkers,
                      mode,
                      skip_header=0):
    if mode == 'CSV.GZ':
        data_filename = dataPath + "/" + str(
            dataIndex)  # I'm just going to give it the file name
        X = pd.read_csv(os.path.join(io.DeepLearningRoot(),
                                     data_filename)).to_numpy()
        # print(np.shape(X))
        actual = pd.read_csv(
            os.path.join(io.DeepLearningRoot(),
                         data_filename.replace("/x/", "/y/")))
        labels = pd.DataFrame([0] * len(actual))
        for aci in range(len(actual.columns)):
            labels[actual[actual.columns[aci]] == 1] = aci + 1
        labels = [
            item for sublist in labels.values.tolist() for item in sublist
        ]

    else:
        if mode == 'CSV':
            data_filename = dataPath + '/sample' + str(dataIndex) + '.csv'
            X = genfromtxt(os.path.join(io.DeepLearningRoot(), data_filename),
                           delimiter=',',
                           skip_header=skip_header)
        if mode == 'FCS':
            data_filename = dataPath + '/sample' + str(dataIndex) + '.fcs'
            _, X = fcsparser.parse(os.path.join(io.DeepLearningRoot(),
                                                data_filename),
                                   reformat_meta=True)
            X = X.as_matrix()
        label_filename = dataPath + '/labels' + str(dataIndex) + '.csv'
        labels = genfromtxt(os.path.join(io.DeepLearningRoot(),
                                         label_filename),
                            delimiter=',')
    labels = np.int_(labels)

    X = X[:, relevantMarkers]
    sample = Sample(X, labels)

    return sample
Exemple #20
0
    def default_view(self):
        """
        Returns a diagnostic plot to see if the bleedthrough spline estimation
        is working.
        
        Returns
        -------
            IView : An IView, call plot() to see the diagnostic plots
        """
        
        try:
            _ = fcsparser.parse(self.beads_file, 
                                meta_data_only = True, 
                                reformat_meta = True)
        except Exception as e:
            raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\
                               .format(self.beads_file, e.value))

        return BeadCalibrationDiagnostic(op = self)
def autodetect_name_metadata(filename, data_set = 0):

    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            metadata = fcsparser.parse(filename,
                                       data_set = data_set,
                                       meta_data_only = True,
                                       reformat_meta = True)
    except Exception as e:
        warnings.warn("Trouble getting metadata from {}: {}".format(filename, str(e)),
                      util.CytoflowWarning)
        return '$PnS'
    
    meta_channels = metadata["_channels_"]
    
    if "$PnN" in meta_channels and not "$PnS" in meta_channels:
        name_metadata = "$PnN"
    elif "$PnN" not in meta_channels and "$PnS" in meta_channels:
        name_metadata = "$PnS"
    else:
        PnN = meta_channels["$PnN"]
        PnS = meta_channels["$PnS"]
        
        # sometimes not all of the channels have a $PnS.  all the channels must 
        # have a $PnN to be compliant with the spec
        if None in PnS:
            name_metadata = "$PnN"
        
        # sometimes one is unique and the other isn't
        if (len(set(PnN)) == len(PnN) and 
            len(set(PnS)) != len(PnS)):
            name_metadata = "$PnN"
        elif (len(set(PnN)) != len(PnN) and 
              len(set(PnS)) == len(PnS)):
            name_metadata = "$PnS"
        else:
            # as per fcsparser.api, $PnN is the "short name" (like FL-1)
            # and $PnS is the "actual name" (like "FSC-H").  so let's
            # use $PnS.
            name_metadata = "$PnS"
            
    return name_metadata
Exemple #22
0
def from_fcs(
    cls,
    fcs_file,
    cofactor=5,
    metadata_channels=[
        "Time",
        "Event_length",
        "DNA1",
        "DNA2",
        "Cisplatin",
        "beadDist",
        "bead1",
    ],
):

    # Parse the fcs file
    text, data = fcsparser.parse(fcs_file)
    data = data.astype(np.float64)

    # Extract the S and N features (Indexing assumed to start from 1)
    # Assumes channel names are in S
    no_channels = text["$PAR"]
    channel_names = [""] * no_channels
    for i in range(1, no_channels + 1):
        # S name
        try:
            channel_names[i - 1] = text["$P%dS" % i]
        except KeyError:
            channel_names[i - 1] = text["$P%dN" % i]
    data.columns = channel_names

    # Metadata and data
    metadata_channels = data.columns.intersection(metadata_channels)
    data_channels = data.columns.difference(metadata_channels)
    # metadata = data[metadata_channels]
    data = data[data_channels]

    # Transform if necessary
    if cofactor is not None or cofactor > 0:
        data = np.arcsinh(np.divide(data, cofactor))

    return data
Exemple #23
0
 def plot(self, experiment = None, **kwargs):
     """Plot a faceted histogram view of a channel"""
     
     import matplotlib.pyplot as plt
     import seaborn as sns
     
     kwargs.setdefault('histtype', 'stepfilled')
     kwargs.setdefault('alpha', 0.5)
     kwargs.setdefault('antialiased', True)
     
     _, blank_data = fcsparser.parse(self.op.blank_file, reformat_meta=True)    
     plt.figure()
     
     for idx, channel in enumerate(self.op.channels):
         d = blank_data[channel]
         plt.subplot(len(self.op.channels), 1, idx+1)
         plt.title(channel)
         plt.hist(d, bins = 200, **kwargs)
         
         plt.axvline(self.op._af_median[channel], color = 'r')
Exemple #24
0
def load_data(data_path, data_index, relevant_markers, mode, skip_header=0):
    if mode == 'CSV':
        data_filename = data_path + '/sample' + str(data_index) + '.csv'
        x = genfromtxt(os.path.join(io.deep_learning_root(), data_filename),
                       delimiter=',',
                       skip_header=skip_header)
    if mode == 'FCS':
        files = [file for file in os.listdir(data_path) if '.fcs' in file]
        data_filename = os.path.join(data_path, files[data_index])
        _, x = fcsparser.parse(os.path.join(io.deep_learning_root(),
                                            data_filename),
                               reformat_meta=True)
        x = x.as_matrix()
    x = x[:, relevant_markers]
    #label_filename = data_path + '/labels' + str(data_index) + '.csv'
    #labels = genfromtxt(os.path.join(io.deep_learning_root(), label_filename), delimiter=',')
    #labels = np.int_(labels)
    sample = Sample(x)

    return sample
Exemple #25
0
def query_specimen_fcsfile_data(request):
    try:
        params = json.loads(request.body)
        filename = params['filename']
        specimenid = params['specimenid']
        specimen = Specimen.objects.get(specimenid=specimenid)
        if specimen is None:
            logger.error('param vaild')

        querysubdir = specimen.specimendir
        cols = {}
        meta, df = fcsparser.parse(get_fcsfilepath(querysubdir, filename))
        for col in numpy.array(df.columns).tolist():
            cols[col] = df[col].tolist()
        cols['filename'] = filename
        cols['specimenid'] = specimenid
        return em.create_sucess_response(cols)
    except Exception as e:
        logger.exception(e)
        return em.create_fail_response(e, em.FAIL)
Exemple #26
0
    def draw(self, specimengates):
        result = None
        for specimengate in specimengates:
            filename = specimengate.fcsfilename
            filepath = get_fcsfilepath(self.fcsfiledir, filename)
            meta, df = fcsparser.parse(filepath)
            normal_gates = None
            vetx_gate = None
            if specimengate.gatetype == 0:
                normal_gates = json.loads(specimengate.gates)
            elif specimengate.gatetype == 1:
                vetx_gate = json.loads(specimengate.gates)

            if normal_gates is not None:
                for gate in normal_gates:
                    self.draw_normal_gate(filename, gate, df)

            if vetx_gate is not None:
                result = self.draw_vetx_gate(filename, vetx_gate, df)
        self.copy_last_plot()
        return self.imgs, result
def load_cll_data_1p_fcs(diagnosis_filename, cytometry_dir, features):
    X, y = [], []
    diagnosis_df = pd.read_csv(diagnosis_filename, sep='\t')
    for filename in sorted(os.listdir(cytometry_dir)):
        if os.path.isdir(os.path.join(cytometry_dir, filename)):
            continue

        # filter out PB1 samples that we do not have diagnosis information about
        file_path = os.path.join(cytometry_dir, filename)
        if filename in diagnosis_df['FileName'].values:
            meta_data, file_df = fcsparser.parse(file_path,
                                                 meta_data_only=False,
                                                 reformat_meta=True)
            print(list(file_df))
            X.append(file_df[features].values)
            y.append(diagnosis_df.loc[diagnosis_df['FileName'] == filename]
                     ['Diagnosis'].values[0])
    d = {'no': 0, 'yes': 1}
    y = [d[_] for _ in y]
    print(y)
    return X, y
Exemple #28
0
def cluster_fcs(filename,
                model,
                features=RELEVANT_FEATURES,
                return_type='array'):
    '''Trains specified cluster model on FCS file

    KMeans is trained with n_clusters=4.

    DBSCAN is trained with min_samples=100 and eps=5e4.

    Args:
        filename (str): filename of fcs data to be clustered
        model (str): 'kmeans' or 'dbscan'
        return_type (str): 'dataframe' or 'array'

    Returns:
        Either pd.DataFrame with new 'cluster_label' column or an np.array with
        cluster labels
    '''
    # validate input
    if model not in {'kmeans', 'dbscan'}:
        raise Exception('model must be "kmeans" or "dbscan".')
    if return_type not in {'dataframe', 'array'}:
        raise Exception('return_type must be "dataframe" or "array".')

    # train cluster model
    if model.lower() == 'kmeans':
        model = KMeans(n_clusters=4)
    elif model.lower() == 'dbscan':
        model = DBSCAN(min_samples=100, eps=5e4)
    _, data = fcs.parse(filename)
    model.fit(data[features])
    labels = model.labels_

    if return_type == 'dataframe':
        data['cluster_label'] = labels
        return data
    elif return_type == 'array':
        return labels
def cell_stat(request):
    try:
        params = json.loads(request.body)
        filename = params['fcsfilename']
        specimenid = params['specimenid']
        polygons = params['polygongate']
        specimen = Specimen.objects.get(specimenid=specimenid)
        meta, df = fcsparser.parse(
            get_fcsfilepath(specimen.specimendir, filename))
        actual_x = df[SSC_A]
        actual_y = df[PerCP_A]
        gate = Gate()
        gate.load(polygons)
        x = actual_x.values.reshape(actual_x.values.size, 1)
        y = actual_y.values.reshape(actual_y.values.size, 1)
        points = numpy.concatenate((x, y), axis=1)
        result = gate.stat(points)
        result['detail'] = {}
        return em.create_sucess_response(result)
    except Exception as e:
        logger.exception(e)
        return em.create_fail_response(e, em.FAIL)
Exemple #30
0
def fcs_to_csv(path, file_name, save_metadata=False, gate=True, alpha=0.4):
    R"""
    Reads in a Flow Cytometry Standard (FCS) file and exports all content
    directly to an easily parseable csv fie.

    Parameters
    ----------
    path : str
        Path to .fcs file
    file_name : str
        Path to save file to .csv
    save_metadata : bool
        If True, a metadata file will also be saved. It will have the name of
        `path` with `_metadata.csv`
    gate : bool
        If True, the provided data will be gated.
    mass_frac : float [0, 1]
        The highest-density fraction of the data desired.
    """

    # Ensure provided file is actually .fcs
    if path.split('.')[-1] != 'fcs':
        raise RuntimeError("`path` is not an FCS file.")

    meta, data = fcsparser.parse(path)
    if gate == True:
        gated = gaussian_gate(data, alpha=alpha)
    else:
        data['gate'] = 0
        gated = data.copy()

    gated = gated.loc[:, ['FSC-A', 'SSC-A', 'FITC-A', 'gate']]
    gated.to_csv(file_name, index=False)

    if save_metadata:
        meta_df = pd.DataFrame(meta)
        meta_name = '{0}_metadata.csv'.format(path[:-4])
        meta_df.to_csv(meta_name, index=False)
Exemple #31
0
    def from_fcs(cls,
                 fcs_file,
                 cofactor=5,
                 metadata_channels=[
                     'Time', 'Event_length', 'DNA1', 'DNA2', 'Cisplatin',
                     'beadDist', 'bead1'
                 ]):

        # Parse the fcs file
        text, data = fcsparser.parse(fcs_file)
        data = data.astype(np.float64)

        # Extract the S and N features (Indexing assumed to start from 1)
        # Assumes channel names are in S
        no_channels = text['$PAR']
        channel_names = [''] * no_channels
        for i in range(1, no_channels + 1):
            # S name
            try:
                channel_names[i - 1] = text['$P%dS' % i]
            except KeyError:
                channel_names[i - 1] = text['$P%dN' % i]
        data.columns = channel_names

        # Metadata and data
        metadata_channels = data.columns.intersection(metadata_channels)
        data_channels = data.columns.difference(metadata_channels)
        metadata = data[metadata_channels]
        data = data[data_channels]

        # Transform if necessary
        if cofactor is not None or cofactor > 0:
            data = np.arcsinh(np.divide(data, cofactor))

        # Create and return scdata object
        scdata = cls(data, 'masscyt', metadata)
        return scdata
Exemple #32
0
def test_fcs_header_error():
    path = fcsparser.test_sample_path
    meta, data = fcsparser.parse(path,
                                 reformat_meta=True,
                                 channel_naming="$PnN")
    meta_bad = copy.deepcopy(meta)
    meta_bad["$DATASTART"] = meta_bad["__header__"]["data start"]
    meta_bad["$DATAEND"] = meta_bad["__header__"]["data end"]
    meta_bad["__header__"]["data start"] = 0
    meta_bad["__header__"]["data end"] = 0
    assert (scprep.io.fcs._parse_fcs_header(meta_bad)["$DATASTART"] ==
            scprep.io.fcs._parse_fcs_header(meta)["$DATASTART"])
    assert (scprep.io.fcs._parse_fcs_header(meta_bad)["$DATAEND"] ==
            scprep.io.fcs._parse_fcs_header(meta)["$DATAEND"])

    meta_bad = copy.deepcopy(meta)
    meta_bad["$DATATYPE"] = "invalid"
    utils.assert_raises_message(
        ValueError,
        "Expected $DATATYPE in ['F', 'D']. "
        "Got 'invalid'",
        scprep.io.fcs._parse_fcs_header,
        meta_bad,
    )

    meta_bad = copy.deepcopy(meta)
    for byteord, endian in zip(["4,3,2,1", "1,2,3,4"], [">", "<"]):
        meta_bad["$BYTEORD"] = byteord
        assert scprep.io.fcs._parse_fcs_header(meta_bad)["$ENDIAN"] == endian
    meta_bad["$BYTEORD"] = "invalid"
    utils.assert_raises_message(
        ValueError,
        "Expected $BYTEORD in ['1,2,3,4', '4,3,2,1']. "
        "Got 'invalid'",
        scprep.io.fcs._parse_fcs_header,
        meta_bad,
    )
Exemple #33
0
def test_fcs_reformat_meta():
    path = fcsparser.test_sample_path
    meta, data = fcsparser.parse(path, reformat_meta=True)
    X_meta, _, X = scprep.io.load_fcs(path, reformat_meta=True, override=True)
    assert set(meta.keys()) == set(X_meta.keys())
    for key in meta.keys():
        try:
            np.testing.assert_array_equal(meta[key], X_meta[key], key)
        except AssertionError:
            if key == "$NEXTDATA" or (key.startswith("$P")
                                      and key.endswith("B")):
                np.testing.assert_array_equal(meta[key], int(X_meta[key]), key)
            elif key == "_channels_":
                for column in meta[key].columns:
                    X_column = X_meta[key][column].astype(
                        meta[key][column].dtype)
                    np.testing.assert_array_equal(meta[key][column], X_column,
                                                  key + column)
            else:
                raise
    assert 'Time' not in X.columns
    assert len(set(X.columns).difference(data.columns)) == 0
    np.testing.assert_array_equal(X.index, data.index)
    np.testing.assert_array_equal(X.values, data[X.columns].values)
Exemple #34
0
def loadDeepCyTOFData(dataPath,
                      dataIndex,
                      relevantMarkers,
                      mode,
                      skip_header=0):
    if mode == 'CSV':
        data_filename = dataPath + '/sample' + str(dataIndex) + '.csv'
        X = genfromtxt(os.path.join(io.DeepLearningRoot(), data_filename),
                       delimiter=',',
                       skip_header=skip_header)
    if mode == 'FCS':
        data_filename = dataPath + '/sample' + str(dataIndex) + '.fcs'
        _, X = fcsparser.parse(os.path.join(io.DeepLearningRoot(),
                                            data_filename),
                               reformat_meta=True)
        X = X.as_matrix()
    X = X[:, relevantMarkers]
    label_filename = dataPath + '/labels' + str(dataIndex) + '.csv'
    labels = genfromtxt(os.path.join(io.DeepLearningRoot(), label_filename),
                        delimiter=',')
    labels = np.int_(labels)
    sample = Sample(X, labels)

    return sample
Exemple #35
0
def check_tube(filename, experiment):

    ignore_v = experiment.metadata['ignore_v']

    try:
        tube_meta = fcsparser.parse(
            filename,
            channel_naming=experiment.metadata["name_metadata"],
            meta_data_only=True,
            reformat_meta=True)
    except Exception as e:
        raise util.CytoflowOpError("FCS reader threw an error reading metadata"
                                   " for tube {0}: {1}".format(
                                       filename, str(e)))

    # first make sure the tube has the right channels
    if not set(experiment.channels) <= set(tube_meta["_channel_names_"]):
        raise util.CytoflowOpError(
            "Tube {0} doesn't have the same channels".format(filename))

    tube_channels = tube_meta["_channels_"]
    tube_channels.set_index(experiment.metadata["name_metadata"], inplace=True)
    # next check the per-channel parameters
    for channel in experiment.channels:
        # first check voltage
        if "voltage" in experiment.metadata[channel]:
            if not "$PnV" in tube_channels.ix[channel]:
                raise util.CytoflowOpError("Didn't find a voltage for channel {0}" \
                                   "in tube {1}".format(channel, filename))

            old_v = experiment.metadata[channel]["voltage"]
            new_v = tube_channels.ix[channel]['$PnV']

            if old_v != new_v and not channel in ignore_v:
                raise util.CytoflowOpError(
                    "Tube {0} doesn't have the same voltages".format(filename))
Exemple #36
0
def fcs_csv(file, outDir):
    """Convert fcs file to csv.
    
    Args:
        file (str): Path to the directory containing the fcs file.
        outDir (str): Path to save the output csv file.
        
    Returns:
        Converted csv file.
        
    """
    file_name = Path(file).stem
    logger.info('Started converting the fcs file ' + file_name)
    meta, data = fcsparser.parse(file,
                                 meta_data_only=False,
                                 reformat_meta=True)
    logger.info('Saving csv file ' + file_name)
    #Export the file as csv
    os.chdir(outDir)
    export_csv = data.to_csv(r'%s.csv' % file_name,
                             index=None,
                             header=True,
                             encoding='utf-8-sig')
    return export_csv
Exemple #37
0
    def estimate(self, experiment, subset = None): 
        """
        Estimate the autofluorescence from *blank_file*
        """
        if not experiment:
            raise CytoflowOpError("No experiment specified")
        
        if not set(self.channels) <= set(experiment.channels):
            raise CytoflowOpError("Specified channels that weren't found in "
                               "the experiment.")

        # don't have to validate that blank_file exists; should crap out on 
        # trying to set a bad value
        
        try:
            blank_meta, blank_data = \
                fcsparser.parse(self.blank_file, reformat_meta = True)  
            blank_channels = blank_meta["_channels_"].set_index("$PnN")     
        except Exception as e:
            raise CytoflowOpError("FCS reader threw an error: " + e.value)
        
        for channel in self.channels:
            v = experiment.metadata[channel]['voltage']
            
            if not "$PnV" in blank_channels.ix[channel]:
                raise CytoflowOpError("Didn't find a voltage for channel {0}" \
                                   "in tube {1}".format(channel, self.blank_file))
            
            blank_v = blank_channels.ix[channel]['$PnV']
            
            if blank_v != v:
                raise CytoflowOpError("Voltage differs for channel {0}".format(channel)) 
       
        for channel in self.channels:
            self._af_median[channel] = np.median(blank_data[channel])
            self._af_stdev[channel] = np.std(blank_data[channel])    
    def estimate(self, experiment, subset = None): 
        """
        Estimate the mapping from the two-channel controls
        """

        if not experiment:
            raise CytoflowOpError("No experiment specified")
        
        tubes = {}

        for from_channel, to_channel in self.translation.iteritems():
            
            if (from_channel, to_channel) not in self.controls:
                raise CytoflowOpError("Control file for {0} --> {1} "
                                      "not specified"
                                      .format(from_channel, to_channel))
                
            tube_file = self.controls[(from_channel, to_channel)]
            
            if tube_file not in tubes: 
                try:
                    tube_meta, tube_data = fcsparser.parse(tube_file, 
                                                           reformat_meta = True)
                    tube_channels = tube_meta["_channels_"].set_index("$PnN")
                except Exception as e:
                    raise CytoflowOpError("FCS reader threw an error on tube "
                                          "{0}: {1}"
                                          .format(tube_file, e.value))

                # check voltages
                for channel in [from_channel, to_channel]:
                    exp_v = experiment.metadata[channel]['voltage']
                
                    if not "$PnV" in tube_channels.ix[channel]:
                        raise CytoflowOpError("Didn't find a voltage for "
                                              "channel {0} in tube {1}"
                                              .format(channel, 
                                                      self.controls[channel]))
                    
                    control_v = tube_channels.ix[channel]["$PnV"]
                    
                    if control_v != exp_v:
                        raise CytoflowOpError("Voltage differs for channel "
                                              "{0} in tube {1}"
                                              .format(channel, 
                                                      self.controls[channel]))

                # autofluorescence correction
                af = [(channel, (experiment.metadata[channel]['af_median'],
                                 experiment.metadata[channel]['af_stdev'])) 
                      for channel in experiment.channels 
                      if 'af_median' in experiment.metadata[channel]]
                
                for af_channel, (af_median, af_stdev) in af:
                    tube_data[af_channel] = tube_data[af_channel] - af_median
                    tube_data = tube_data[tube_data[af_channel] > -3 * af_stdev]
                    
                tube_data.reset_index(drop = True, inplace = True)
                    
                # bleedthrough correction
                old_tube_data = tube_data.copy()
                bleedthrough = \
                    {channel: experiment.metadata[channel]['piecewise_bleedthrough']
                     for channel in experiment.channels
                     if 'piecewise_bleedthrough' in experiment.metadata[channel]} 

                for channel, (interp_channels, interpolator) in bleedthrough.iteritems():
                    interp_data = old_tube_data[interp_channels]
                    tube_data[channel] = interpolator(interp_data)
        
                # bead calibration
                beads = [(channel, experiment.metadata[channel]['bead_calibration_fn'])
                         for channel in experiment.channels
                         if 'bead_calibration_fn' in experiment.metadata[channel]]
                
                for channel, calibration_fn in beads:
                    tube_data[channel] = calibration_fn(tube_data[channel])

                tubes[tube_file] = tube_data

                
            data = tubes[tube_file][[from_channel, to_channel]]
            data = data[data[from_channel] > 0]
            data = data[data[to_channel] > 0]
            _ = data.reset_index(drop = True, inplace = True)

            if self.mixture_model:    
                gmm = sklearn.mixture.GMM(n_components=2)
                fit = gmm.fit(np.log10(data[from_channel][:, np.newaxis]))
    
                mu_idx = 0 if fit.means_[0][0] > fit.means_[1][0] else 1
                weights = [x[mu_idx] for x in fit.predict_proba(np.log10(data[from_channel][:, np.newaxis]))]
            else:
                weights = [1] * len(data.index)
                
            lr = np.polyfit(np.log10(data[from_channel]), 
                            np.log10(data[to_channel]), 
                            deg = 1, 
                            w = weights)
            
            self._coefficients[(from_channel, to_channel)] = lr
    def plot(self, experiment, **kwargs):
        """
        Plot the plots
        """
        
        if not experiment:
            raise CytoflowViewError("No experiment specified")
        
        tubes = {}
        
        plt.figure()
        num_plots = len(self.op.translation.keys())
        plt_idx = 0
        
        for from_channel, to_channel in self.op.translation.iteritems():
            
            if (from_channel, to_channel) not in self.op.controls:
                raise CytoflowOpError("Control file for {0} --> {1} not specified"
                                   .format(from_channel, to_channel))
            tube_file = self.op.controls[(from_channel, to_channel)]
            
            if tube_file not in tubes: 
                
                try:
                    _, tube_data = fcsparser.parse(tube_file,
                                                   reformat_meta = True)
                except Exception as e:
                    raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\
                                       .format(tube_file, e.value))
                
                # autofluorescence correction
                af = [(channel, (experiment.metadata[channel]['af_median'],
                                 experiment.metadata[channel]['af_stdev'])) 
                      for channel in experiment.channels 
                      if 'af_median' in experiment.metadata[channel]]
                
                for af_channel, (af_median, af_stdev) in af:
                    tube_data[af_channel] = tube_data[af_channel] - af_median
                    tube_data = tube_data[tube_data[af_channel] > -3 * af_stdev]
                    
                tube_data.reset_index(drop = True, inplace = True)
                    
                # bleedthrough correction
                old_tube_data = tube_data.copy()
                bleedthrough = \
                    {channel: experiment.metadata[channel]['piecewise_bleedthrough']
                     for channel in experiment.channels
                     if 'piecewise_bleedthrough' in experiment.metadata[channel]} 

                for channel, (interp_channels, interpolator) in bleedthrough.iteritems():
                    interp_data = old_tube_data[interp_channels]
                    tube_data[channel] = interpolator(interp_data)
                    
                # bead calibration
                beads = [(channel, experiment.metadata[channel]['bead_calibration_fn'])
                         for channel in experiment.channels
                         if 'bead_calibration_fn' in experiment.metadata[channel]]
                
                for channel, calibration_fn in beads:
                    tube_data[channel] = calibration_fn(tube_data[channel])

                tubes[tube_file] = tube_data
                
            from_range = experiment.metadata[from_channel]['range']
            to_range = experiment.metadata[to_channel]['range']
            data = tubes[tube_file][[from_channel, to_channel]]
            data = data[data[from_channel] > 0]
            data = data[data[to_channel] > 0]
            _ = data.reset_index(drop = True, inplace = True)

            if self.op.mixture_model:    
                plt.subplot(num_plots, 2, plt_idx * 2 + 2)
                plt.xscale('log', nonposx='mask')
                hist_bins = np.logspace(1, math.log(from_range, 2), num = 128, base = 2)
                _ = plt.hist(data[from_channel],
                             bins = hist_bins,
                             histtype = 'stepfilled',
                             antialiased = True)
                plt.xlabel(from_channel)
                
                gmm = sklearn.mixture.GMM(n_components=2)
                fit = gmm.fit(np.log10(data[from_channel][:, np.newaxis]))
    
                mu_idx = 0 if fit.means_[0][0] > fit.means_[1][0] else 1
                weights = [x[mu_idx] for x in fit.predict_proba(np.log10(data[from_channel][:, np.newaxis]))]
                
                plt.axvline(10 ** fit.means_[0][0], color = 'r')
                plt.axvline(10 ** fit.means_[1][0], color = 'r')
            else:
                weights = [1] * len(data.index)
                
            lr = np.polyfit(np.log10(data[from_channel]), 
                            np.log10(data[to_channel]), 
                            deg = 1, 
                            w = weights)
            
            num_cols = 2 if self.op.mixture_model else 1
            plt.subplot(num_plots, num_cols, plt_idx * num_cols + 1)
            plt.xscale('log', nonposx = 'mask')
            plt.yscale('log', nonposy = 'mask')
            plt.xlabel(from_channel)
            plt.ylabel(to_channel)
            plt.xlim(1, from_range)
            plt.ylim(1, to_range)
            
            kwargs.setdefault('alpha', 0.2)
            kwargs.setdefault('s', 1)
            kwargs.setdefault('marker', 'o')
            
            plt.scatter(data[from_channel],
                        data[to_channel],
                        **kwargs)          

            xs = np.logspace(1, math.log(from_range, 2), num = 256, base = 2)
            p = np.poly1d(lr)
            plt.plot(xs, 10 ** p(np.log10(xs)), "--g")
            
            plt_idx = plt_idx + 1
Exemple #40
0
    def apply(self, experiment=None, metadata_only=False):
        """
        Load a new :class:`.Experiment`.  
        
        Parameters
        ----------
        experiment : Experiment
            Ignored
            
        metadata_only : bool (default = False)
            Only "import" the metadata, creating an Experiment with all the
            expected metadata and structure but 0 events.
        
        Returns
        -------
        Experiment
            The new :class:`.Experiment`.  New channels have the following
            metadata:
            
            - **voltage** - int
                The voltage that this channel was collected at.  Determined
                by the ``$PnV`` field from the first FCS file.
                
            - **range** - int
                The maximum range of this channel.  Determined by the ``$PnR``
                field from the first FCS file.
                
            New experimental conditions do not have **voltage** or **range**
            metadata, obviously.  Instead, they have **experiment** set to 
            ``True``, to distinguish the experimental variables from the
            conditions that were added by gates, etc.
            
            If :attr:`ignore_v` is set, it is added as a key to the 
            :class:`.Experiment`-wide metadata.
            
        """

        if not self.tubes or len(self.tubes) == 0:
            raise util.CytoflowOpError('tubes', "Must specify some tubes!")

        # if we have channel renaming, make sure the new names are valid
        # python identifiers
        if self.channels:
            for old_name, new_name in self.channels.items():
                if old_name != new_name and new_name != util.sanitize_identifier(
                        new_name):
                    raise util.CytoflowOpError(
                        'channels', "Channel name {} must be a "
                        "valid Python identifier.".format(new_name))

        # make sure each tube has the same conditions
        tube0_conditions = set(self.tubes[0].conditions)
        for tube in self.tubes:
            tube_conditions = set(tube.conditions)
            if len(tube0_conditions ^ tube_conditions) > 0:
                raise util.CytoflowOpError(
                    'tubes', "Tube {0} didn't have the same "
                    "conditions as tube {1}".format(tube.file,
                                                    self.tubes[0].file))

        # make sure experimental conditions are unique
        for idx, i in enumerate(self.tubes[0:-1]):
            for j in self.tubes[idx + 1:]:
                if i.conditions_equal(j):
                    raise util.CytoflowOpError(
                        'tubes', "The same conditions specified for "
                        "tube {0} and tube {1}".format(i.file, j.file))

        experiment = Experiment()

        experiment.metadata["ignore_v"] = self.ignore_v

        for condition, dtype in list(self.conditions.items()):
            experiment.add_condition(condition, dtype)
            experiment.metadata[condition]['experiment'] = True

        try:
            # silence warnings about duplicate channels;
            # we'll figure that out below
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                tube0_meta = fcsparser.parse(self.tubes[0].file,
                                             data_set=self.data_set,
                                             meta_data_only=True,
                                             reformat_meta=True)
        except Exception as e:
            raise util.CytoflowOpError(
                'tubes', "FCS reader threw an error reading metadata "
                "for tube {}: {}".format(self.tubes[0].file, str(e))) from e

        meta_channels = tube0_meta["_channels_"]

        if self.name_metadata:
            experiment.metadata["name_metadata"] = self.name_metadata
        else:
            experiment.metadata["name_metadata"] = autodetect_name_metadata(
                self.tubes[0].file, data_set=self.data_set)

        meta_channels['Index'] = meta_channels.index
        meta_channels.set_index(experiment.metadata["name_metadata"],
                                inplace=True)

        channels = list(self.channels.keys()) if self.channels \
                   else list(meta_channels.index.values)

        # make sure everything in self.channels is in the tube channels
        for channel in channels:
            if channel not in meta_channels.index:
                raise util.CytoflowOpError(
                    'channels', "Channel {0} not in tube {1}".format(
                        channel, self.tubes[0].file))

        # now that we have the metadata, load it into experiment

        for channel in channels:
            experiment.add_channel(channel)

            experiment.metadata[channel]["fcs_name"] = channel

            # keep track of the channel's PMT voltage
            if ("$PnV" in meta_channels.loc[channel]):
                v = meta_channels.loc[channel]['$PnV']
                if v: experiment.metadata[channel]["voltage"] = v

            # add the maximum possible value for this channel.
            data_range = meta_channels.loc[channel]['$PnR']
            data_range = float(data_range)
            experiment.metadata[channel]['range'] = data_range

        experiment.metadata['fcs_metadata'] = {}
        for tube in self.tubes:
            if metadata_only:
                tube_meta, tube_data = parse_tube(tube.file,
                                                  experiment,
                                                  data_set=self.data_set,
                                                  metadata_only=True)
            else:
                tube_meta, tube_data = parse_tube(tube.file,
                                                  experiment,
                                                  data_set=self.data_set)

                if self.events:
                    if self.events <= len(tube_data):
                        tube_data = tube_data.loc[np.random.choice(
                            tube_data.index, self.events, replace=False)]
                    else:
                        warnings.warn(
                            "Only {0} events in tube {1}".format(
                                len(tube_data), tube.file),
                            util.CytoflowWarning)

                experiment.add_events(tube_data[channels], tube.conditions)

            # extract the row and column from wells collected on a
            # BD HTS
            if 'WELL ID' in tube_meta:
                pos = tube_meta['WELL ID']
                tube_meta['CF_Row'] = pos[0]
                tube_meta['CF_Col'] = int(pos[1:3])

            for i, channel in enumerate(channels):
                # remove the PnV tube metadata

                if '$P{}V'.format(i + 1) in tube_meta:
                    del tube_meta['$P{}V'.format(i + 1)]

                # work around a bug where the PnR is sometimes not the detector range
                # but the data range.
                pnr = '$P{}R'.format(i + 1)
                if pnr in tube_meta and float(
                        tube_meta[pnr]
                ) > experiment.metadata[channel]['range']:
                    experiment.metadata[channel]['range'] = float(
                        tube_meta[pnr])

            tube_meta['CF_File'] = Path(tube.file).stem

            experiment.metadata['fcs_metadata'][tube.file] = tube_meta

        for channel in channels:
            if self.channels and channel in self.channels:
                new_name = self.channels[channel]
                if channel == new_name:
                    continue
                experiment.data.rename(columns={channel: new_name},
                                       inplace=True)
                experiment.metadata[new_name] = experiment.metadata[channel]
                experiment.metadata[new_name]["fcs_name"] = channel
                del experiment.metadata[channel]

            # this catches an odd corner case where some instruments store
            # instrument-specific info in the "extra" bits.  we have to
            # clear them out.
            if tube0_meta['$DATATYPE'] == 'I':
                data_bits = int(meta_channels.loc[channel]['$PnB'])
                data_range = float(meta_channels.loc[channel]['$PnR'])
                range_bits = int(math.log(data_range, 2))

                if range_bits < data_bits:
                    mask = 1
                    for _ in range(1, range_bits):
                        mask = mask << 1 | 1

                    experiment.data[channel] = experiment.data[
                        channel].values.astype('int') & mask

            # re-scale the data to linear if if's recorded as log-scaled with
            # integer channels
            data_range = float(meta_channels.loc[channel]['$PnR'])
            f1 = float(meta_channels.loc[channel]['$PnE'][0])
            f2 = float(meta_channels.loc[channel]['$PnE'][1])

            if f1 > 0.0 and f2 == 0.0:
                warnings.warn(
                    'Invalid $PnE = {},{} for channel {}, changing it to {},1.0'
                    .format(f1, f2, channel, f1), util.CytoflowWarning)
                f2 = 1.0

            if f1 > 0.0 and f2 > 0.0 and tube0_meta['$DATATYPE'] == 'I':
                warnings.warn(
                    'Converting channel {} from logarithmic to linear'.format(
                        channel), util.CytoflowWarning)


#                 experiment.data[channel] = 10 ** (f1 * experiment.data[channel] / data_range) * f2

        return experiment
Exemple #41
0
                    cats = set(self.data[meta_name].cat.categories) | set(new_data[meta_name].cat.categories)
                    self.data[meta_name] = self.data[meta_name].cat.set_categories(cats)
                    new_data[meta_name] = new_data[meta_name].cat.set_categories(cats)
            except (ValueError, TypeError):
                raise CytoflowError("Tube {0} had trouble converting conditions {1}"
                                   "(value = {2}) to type {3}" \
                                   .format(tube_file,
                                           meta_name,
                                           meta_value,
                                           meta_type))
        
        self._tube_conditions.add(frozenset(conditions.iteritems()))
        self.data = self.data.append(new_data, ignore_index = True)
        del new_data


if __name__ == "__main__":
    import fcsparser
    ex = Experiment()
    ex.add_conditions({"time" : "category"})
    
    tube1 = fcsparser.parse('../cytoflow/tests/data/Plate01/CFP_Well_A4.fcs')
    
    tube2 = fcsparser.parse('../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs')
    
    ex.add_tube(tube1, {"time" : "one"})
    ex.add_tube(tube2, {"time" : "two"})
    
    print(ex.data)

Exemple #42
0
    n = mu.shape[0]
    Sigma_det = np.linalg.det(Sigma)
    Sigma_inv = np.linalg.inv(Sigma)
    N = np.sqrt((2*np.pi)**n * Sigma_det)
    # This einsum call calculates (x-mu)T.Sigma-1.(x-mu) in a vectorized
    # way across all the input variables.
    fac = np.einsum('...k,kl,...l->...', pos-mu, Sigma_inv, pos-mu)

    return np.exp(-fac / 2) / N
for index, row in iters2:
    print("test")#shameless cherrypicking for example


    data_dir='../FCS/'
    example_file=row.filename#"Huseyin2019-09-26.0211.fcs"#"Huseyin2019-09-24.0072.fcs"#
    meta, data = fcsparser.parse(data_dir + example_file, meta_data_only=False, reformat_meta=True)
    data.columns=[x.strip().replace('-', '_') for x in data.columns]

    df2=row#df[df.filename==example_file].iloc[0]
    data["GFP_Decomposed"]=np.exp((np.log(data["GFP_H"])*df2["log_std_v"]-df2["log_std_gfp"]*np.log(data["FSC_H"])*df2["log_rho"]+df2["log_std_gfp"]*df2["log_mean_v_mean"]*df2["log_rho"])/df2["log_std_v"])
    f, [[ax,ax1],[ax2,ax3]] = plt.subplots(ncols=2,nrows=2,figsize=(10, 10),sharey=True,sharex=True,gridspec_kw = {'wspace':0, 'hspace':0})

    f.suptitle(row.filename)
    ax.set_ylim([2,4.5])
    ax.set_xlim([1,4])
    ax2.axvline(df2["log_mean_v_mean"])
    ax2.text(df2["log_mean_v_mean"]+0.02,1.5+1+0.15,'Context average',rotation=90)
    ax.text(df2["log_mean_v_mean"]+0.02,1.5+1+0.15,'Context average',rotation=90)
    ax.axvline(df2["log_mean_v_mean"])

    #ax1.set_ylim([-1,5])
Exemple #43
0
    def apply(self, experiment = None):
        
        if not self.tubes or len(self.tubes) == 0:
            raise util.CytoflowOpError("Must specify some tubes!")
        
        # make sure each tube has the same conditions
        tube0_conditions = set(self.tubes[0].conditions)
        for tube in self.tubes:
            tube_conditions = set(tube.conditions)
            if len(tube0_conditions ^ tube_conditions) > 0:
                raise util.CytoflowOpError("Tube {0} didn't have the same "
                                      "conditions as tube {1}"
                                      .format(tube.file, self.tubes[0].file))

        # make sure experimental conditions are unique
        for idx, i in enumerate(self.tubes[0:-1]):
            for j in self.tubes[idx+1:]:
                if i.conditions_equal(j):
                    raise util.CytoflowOpError("The same conditions specified for "
                                          "tube {0} and tube {1}"
                                          .format(i.file, j.file))
        
        experiment = Experiment()
        
        experiment.metadata["ignore_v"] = self.ignore_v
            
        for condition, dtype in self.conditions.items():
            experiment.add_condition(condition, dtype)

        try:
            # silence warnings about duplicate channels;
            # we'll figure that out below
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                tube0_meta = fcsparser.parse(self.tubes[0].file,
                                             meta_data_only = True,
                                             reformat_meta = True)
        except Exception as e:
            raise util.CytoflowOpError("FCS reader threw an error reading metadata "
                                       " for tube {0}: {1}"
                                       .format(self.tubes[0].file, str(e)))
              
        meta_channels = tube0_meta["_channels_"]
        
        if self.name_metadata:
            experiment.metadata["name_metadata"] = self.name_metadata
        else:
            # try to autodetect the metadata
            if "$PnN" in meta_channels and not "$PnS" in meta_channels:
                experiment.metadata["name_metadata"] = "$PnN"
            elif "$PnN" not in meta_channels and "$PnS" in meta_channels:
                experiment.metadata["name_metadata"] = "$PnS"
            else:
                PnN = meta_channels["$PnN"]
                PnS = meta_channels["$PnS"]
                
                # sometimes one is unique and the other isn't
                if (len(set(PnN)) == len(PnN) and 
                    len(set(PnS)) != len(PnS)):
                    experiment.metadata["name_metadata"] = "$PnN"
                elif (len(set(PnN)) != len(PnN) and 
                      len(set(PnS)) == len(PnS)):
                    experiment.metadata["name_metadata"] = "$PnS"
                else:
                    # as per fcsparser.api, $PnN is the "short name" (like FL-1)
                    # and $PnS is the "actual name" (like "FSC-H").  so let's
                    # use $PnS.
                    experiment.metadata["name_metadata"] = "$PnS"

        meta_channels.set_index(experiment.metadata["name_metadata"], 
                                inplace = True)
        
        # now that we have the metadata, load it into experiment

        for channel in meta_channels.index:
            experiment.add_channel(channel)
            
            # keep track of the channel's PMT voltage
            if("$PnV" in meta_channels.ix[channel]):
                v = meta_channels.ix[channel]['$PnV']
                if v: experiment.metadata[channel]["voltage"] = v
            
            # add the maximum possible value for this channel.
            data_range = meta_channels.ix[channel]['$PnR']
            data_range = float(data_range)
            experiment.metadata[channel]['range'] = data_range
        
        for tube in self.tubes:
            tube_data = parse_tube(tube.file, experiment, self.ignore_v)

            if self.coarse_events:
                if self.coarse_events <= len(tube_data):
                    tube_data = tube_data.loc[np.random.choice(tube_data.index,
                                                               self.coarse_events,
                                                               replace = False)]
                else:
                    warnings.warn("Only {0} events in tube {1}"
                                  .format(len(tube_data), tube.file),
                                  util.CytoflowWarning)

            experiment.add_events(tube_data, tube.conditions)
            
        return experiment
def SimpleOverlay(filebig, bluefolder, redfolder, greenfolder, title, bluelabel, redlabel, greenlable, nonelabel, savespace):
    dfbig = []
    for files in os.listdir(filebig):
        pathname = os.path.join(filebig, files)
        if '.DS_Store' not in pathname:
            if os.path.isfile(pathname):
                dfbig.append(fcsparser.parse(pathname)[1])
    dfbigcomb = pd.concat(dfbig)

    titles = list(dfbigcomb.columns)
    channels = titles[4:len(titles)-2]

    dfbigadj = dfbigcomb[channels].applymap(lambda x:np.arcsinh(x/150))
    dfbigadj['FSC-A'] = dfbigcomb['FSC-A']/dfbigcomb['FSC-A'].max() * 10
    dfbigadj['SSC-A'] = dfbigcomb['SSC-A']/dfbigcomb['FSC-A'].max() * 10
    
    dfblue = []
    for files in os.listdir(bluefolder):
        pathname = os.path.join(bluefolder, files)
        if '.DS_Store' not in pathname:
            if os.path.isfile(pathname):
                dfblue.append(fcsparser.parse(pathname)[1])
    dfbluecomb = pd.concat(dfblue)
    
    dfred = []
    for files in os.listdir(redfolder):
        pathname = os.path.join(redfolder, files)
        if '.DS_Store' not in pathname:
            if os.path.isfile(pathname):
                dfred.append(fcsparser.parse(pathname)[1])
    dfredcomb = pd.concat(dfred)
    
    dfgreen = []
    for files in os.listdir(greenfolder):
        pathname = os.path.join(greenfolder, files)
        if '.DS_Store' not in pathname:
            if os.path.isfile(pathname):
                dfgreen.append(fcsparser.parse(pathname)[1])
    dfgreencomb = pd.concat(dfgreen)
    

    cbtime = dfbigcomb['Time'].isin(dfbluecomb['Time'])
    browtime = np.where(cbtime == True)[0]
    browind = list(browtime)
    bnotrowtime = np.where(cbtime == False)[0]
    bnotrowind = list(bnotrowtime)
    
    crtime = dfbigcomb['Time'].isin(dfredcomb['Time'])
    rrowtime = np.where(crtime == True)[0]
    rrowind = list(rrowtime)
    rnotrowtime = np.where(crtime == False)[0]
    rnotrowind = list(rnotrowtime)
    
    cgtime = dfbigcomb['Time'].isin(dfgreencomb['Time'])
    growtime = np.where(cgtime == True)[0]
    growind = list(growtime)
    gnotrowtime = np.where(cgtime == False)[0]
    gnotrowind = list(gnotrowtime)

    e = umap.UMAP(random_state=0).fit_transform(dfbigadj)

    plt.scatter(e[bnotrowind,0], e[bnotrowind,1], s=.1, c=('#ABB2B9'))
    plt.scatter(e[rnotrowind,0], e[rnotrowind,1], s=.1, c=('#ABB2B9'))
    plt.scatter(e[gnotrowind,0], e[gnotrowind,1], s=.1, c=('#ABB2B9'))
    plt.scatter(e[rrowind,0], e[rrowind,1], s=.1, c='r')
    plt.scatter(e[browind,0], e[browind,1], s=.1, c='b')
    plt.scatter(e[growind,0], e[growind,1], s=.1, c='g')
    plt.title(title)
    plt.xticks([])
    plt.yticks([])
    redp = mpatches.Patch(color='red', label=redlabel)
    bluep = mpatches.Patch(color='b', label=bluelabel)
    greenp = mpatches.Patch(color='g', label=greenlabel)
    greyp = mpatches.Patch(color=('#ABB2B9'), label=nonelabel)
    plt.legend(handles=[redp, bluep, greenp, greyp])
atc_conc = 10  # in ng/mL
RUN_NO = 2
promoter = '27yfp'
gating_fraction = 0.4

## Hardcoded arrangement garbage.
xan_mgml = (0, 0, 0, 0.05, 0.1, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 2.5, 3.5, 4.0,
            5.0, 6.0, 7.0, 8.0, 9.0, 10.0)
_strains = [['auto'], ['delta'], ['dilution'] * 18]
strains = [l[i] for l in _strains for i in range(len(l))]

# Define directories and search pattern
src = '../../../data/flow/fcs/'
dst = '../../../data/flow/csv/'
pattern = f'RP{DATE[:4]}-{DATE[4:6]}-{DATE[6:]}_r{RUN_NO}'

# Get the names of the files.
files = np.sort(glob.glob(f'{src}{pattern}*.fcs'))

# %%Iterate through each strain and concentration.
for s, c, f in zip(strains, xan_mgml, files):
    # Define the new name.
    new_name = f'{DATE}_r{RUN_NO}_{promoter}_{s}_{atc_conc}ngmlATC_{c}mgmlXAN'

    # Load the data using fcs parser and save to csv.
    _, data = fcsparser.parse(f)
    data.to_csv(f'{dst}{new_name}.csv')

    # Rename the FCS file.
    os.rename(f, f'{src}{new_name}.fcs')
Exemple #46
0
    def apply(self, experiment=None):
        """
        Load a new :class:`.Experiment`.  
        
        Returns
        -------
        Experiment
            The new :class:`.Experiment`.  New channels have the following
            metadata:
            
            - **voltage** - int
                The voltage that this channel was collected at.  Determined
                by the ``$PnV`` field from the first FCS file.
                
            - **range** - int
                The maximum range of this channel.  Determined by the ``$PnR``
                field from the first FCS file.
                
            New experimental conditions do not have **voltage** or **range**
            metadata, obviously.  Instead, they have **experiment** set to 
            ``True``, to distinguish the experimental variables from the
            conditions that were added by gates, etc.
            
            If :attr:`ignore_v` is set, it is added as a key to the 
            :class:`.Experiment`-wide metadata.
            
        """

        if not self.tubes or len(self.tubes) == 0:
            raise util.CytoflowOpError('tubes', "Must specify some tubes!")

        # if we have channel renaming, make sure the new names are valid
        # python identifiers
        if self.channels:
            for old_name, new_name in self.channels.items():
                if old_name != new_name and new_name != util.sanitize_identifier(
                        new_name):
                    raise util.CytoflowOpError(
                        'channels', "Channel name {} must be a "
                        "valid Python identifier.".format(new_name))

        # make sure each tube has the same conditions
        tube0_conditions = set(self.tubes[0].conditions)
        for tube in self.tubes:
            tube_conditions = set(tube.conditions)
            if len(tube0_conditions ^ tube_conditions) > 0:
                raise util.CytoflowOpError(
                    'tubes', "Tube {0} didn't have the same "
                    "conditions as tube {1}".format(tube.file,
                                                    self.tubes[0].file))

        # make sure experimental conditions are unique
        for idx, i in enumerate(self.tubes[0:-1]):
            for j in self.tubes[idx + 1:]:
                if i.conditions_equal(j):
                    raise util.CytoflowOpError(
                        'tubes', "The same conditions specified for "
                        "tube {0} and tube {1}".format(i.file, j.file))

        experiment = Experiment()

        experiment.metadata["ignore_v"] = self.ignore_v

        for condition, dtype in list(self.conditions.items()):
            experiment.add_condition(condition, dtype)
            experiment.metadata[condition]['experiment'] = True

        try:
            # silence warnings about duplicate channels;
            # we'll figure that out below
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                tube0_meta = fcsparser.parse(self.tubes[0].file,
                                             meta_data_only=True,
                                             reformat_meta=True)
        except Exception as e:
            raise util.CytoflowOpError(
                'tubes', "FCS reader threw an error reading metadata "
                "for tube {}".format(self.tubes[0].file)) from e

        meta_channels = tube0_meta["_channels_"]

        if self.name_metadata:
            experiment.metadata["name_metadata"] = self.name_metadata
        else:
            # try to autodetect the metadata
            if "$PnN" in meta_channels and not "$PnS" in meta_channels:
                experiment.metadata["name_metadata"] = "$PnN"
            elif "$PnN" not in meta_channels and "$PnS" in meta_channels:
                experiment.metadata["name_metadata"] = "$PnS"
            else:
                PnN = meta_channels["$PnN"]
                PnS = meta_channels["$PnS"]

                # sometimes one is unique and the other isn't
                if (len(set(PnN)) == len(PnN) and len(set(PnS)) != len(PnS)):
                    experiment.metadata["name_metadata"] = "$PnN"
                elif (len(set(PnN)) != len(PnN) and len(set(PnS)) == len(PnS)):
                    experiment.metadata["name_metadata"] = "$PnS"
                else:
                    # as per fcsparser.api, $PnN is the "short name" (like FL-1)
                    # and $PnS is the "actual name" (like "FSC-H").  so let's
                    # use $PnS.
                    experiment.metadata["name_metadata"] = "$PnS"

        meta_channels.set_index(experiment.metadata["name_metadata"],
                                inplace=True)

        channels = list(self.channels.keys()) if self.channels \
                   else list(tube0_meta["_channel_names_"])

        # make sure everything in self.channels is in the tube channels

        for channel in channels:
            if channel not in meta_channels.index:
                raise util.CytoflowOpError(
                    'channels', "Channel {0} not in tube {1}".format(
                        channel, self.tubes[0].file))

        # now that we have the metadata, load it into experiment

        for channel in channels:
            experiment.add_channel(channel)

            experiment.metadata[channel]["fcs_name"] = channel

            # keep track of the channel's PMT voltage
            if ("$PnV" in meta_channels.loc[channel]):
                v = meta_channels.loc[channel]['$PnV']
                if v: experiment.metadata[channel]["voltage"] = v

            # add the maximum possible value for this channel.
            data_range = meta_channels.loc[channel]['$PnR']
            data_range = float(data_range)
            experiment.metadata[channel]['range'] = data_range

        experiment.metadata['fcs_metadata'] = {}
        for tube in self.tubes:
            tube_meta, tube_data = parse_tube(tube.file, experiment)

            if self.events:
                if self.events <= len(tube_data):
                    tube_data = tube_data.loc[np.random.choice(tube_data.index,
                                                               self.events,
                                                               replace=False)]
                else:
                    warnings.warn(
                        "Only {0} events in tube {1}".format(
                            len(tube_data), tube.file), util.CytoflowWarning)

            experiment.add_events(tube_data[channels], tube.conditions)
            experiment.metadata['fcs_metadata'][tube.file] = tube_meta

        for channel in channels:
            if self.channels and channel in self.channels:
                new_name = self.channels[channel]
                if channel == new_name:
                    continue
                experiment.data.rename(columns={channel: new_name},
                                       inplace=True)
                experiment.metadata[new_name] = experiment.metadata[channel]
                experiment.metadata[new_name]["fcs_name"] = channel
                del experiment.metadata[channel]

        return experiment
Exemple #47
0
                                  color='blue')
            self._cursor.connect_event('button_press_event', self._onclick)
            
        elif self._cursor:
            self._cursor.disconnect_events()
            self._cursor = None
            
    def _onclick(self, event):
        """Update the threshold location"""
        self.op.threshold = event.xdata
        
if __name__ == '__main__':
    import cytoflow as flow
    import fcsparser

    tube1 = fcsparser.parse('../../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs',
                            reformat_meta = True)

    tube2 = fcsparser.parse('../../cytoflow/tests/data/Plate01/CFP_Well_A4.fcs',
                            reformat_meta = True)
    
    ex = flow.Experiment()
    ex.add_conditions({"Dox" : "float"})
    
    ex.add_tube(tube1, {"Dox" : 10.0})
    ex.add_tube(tube2, {"Dox" : 1.0})
    
    hlog = flow.HlogTransformOp()
    hlog.name = "Hlog transformation"
    hlog.channels = ['Y2-A']
    ex2 = hlog.apply(ex)
    
Exemple #48
0
    def estimate(self, experiment, subset = None): 
        """
        Estimate the calibration coefficients from the beads file.
        """
        if not experiment:
            raise CytoflowOpError("No experiment specified")
        
        try:
            beads_meta, beads_data = fcsparser.parse(self.beads_file, 
                                                     reformat_meta = True)
            beads_channels = beads_meta["_channels_"].set_index("$PnN")
        except Exception as e:
            raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\
                               .format(self.beads_file, e.value))
        
        channels = self.units.keys()

        # make sure the voltages didn't change
        
        for channel in channels:
            exp_v = experiment.metadata[channel]['voltage']
        
            if not "$PnV" in beads_channels.ix[channel]:
                raise CytoflowOpError("Didn't find a voltage for channel {0}" \
                                   "in tube {1}".format(channel, self.beads_file))
            
            control_v = beads_channels.ix[channel]['$PnV']
            
            if control_v != exp_v:
                raise CytoflowOpError("Voltage differs for channel {0} in tube {1}"
                                   .format(channel, self.beads_file))
    

        for channel in channels:
            data = beads_data[channel]
            
            # bin the data on a log scale
            data_range = experiment.metadata[channel]['range']
            hist_bins = np.logspace(1, math.log(data_range, 2), num = 256, base = 2)
            hist = np.histogram(data, bins = hist_bins)
            
            # mask off-scale values
            hist[0][0] = 0
            hist[0][-1] = 0
            
            # smooth it with a Savitzky-Golay filter
            hist_smooth = scipy.signal.savgol_filter(hist[0], 5, 1)
            
            # find peaks
            peak_bins = scipy.signal.find_peaks_cwt(hist_smooth, 
                                                    widths = np.arange(3, 20),
                                                    max_distances = np.arange(3, 20) / 2)
            
            # filter by height and intensity
            peak_threshold = np.percentile(hist_smooth, self.bead_peak_quantile)
            peak_bins_filtered = \
                [x for x in peak_bins if hist_smooth[x] > peak_threshold 
                 and hist[1][x] > self.bead_brightness_threshold]
            
            peaks = [hist_bins[x] for x in peak_bins_filtered]
            
            mef_unit = self.units[channel]
            
            if not mef_unit in self.beads:
                raise CytoflowOpError("Invalid unit {0} specified for channel {1}".format(mef_unit, channel))
            
            # "mean equivalent fluorochrome"
            mef = self.beads[mef_unit]
            
            if len(peaks) == 0:
                raise CytoflowOpError("Didn't find any peaks; check the diagnostic plot")
            elif len(peaks) > len(self.beads):
                raise CytoflowOpError("Found too many peaks; check the diagnostic plot")
            elif len(peaks) == 1:
                # if we only have one peak, assume it's the brightest peak
                self._coefficients[channel] = [mef[-1] / peaks[0]] 
            elif len(peaks) == 2:
                # if we have only two peaks, assume they're the brightest two
                self._coefficients[channel] = \
                    [(mef[-1] - mef[-2]) / (peaks[1] - peaks[0])]
            else:
                # if there are n > 2 peaks, check all the contiguous n-subsets
                # of mef for the one whose linear regression with the peaks
                # has the smallest (norm) sum-of-residuals.
                
                # do it in log10 space because otherwise the brightest peaks
                # have an outsized influence.
                
                best_resid = np.inf
                for start, end in [(x, x+len(peaks)) for x in range(len(mef) - len(peaks) + 1)]:
                    mef_subset = mef[start:end]
                    
                    # linear regression of the peak locations against mef subset
                    lr = np.polyfit(np.log10(peaks), 
                                    np.log10(mef_subset), 
                                    deg = 1, 
                                    full = True)
                    
                    resid = lr[1][0]
                    if resid < best_resid:
                        best_lr = lr[0]
                        best_resid = resid
                        
                self._coefficients[channel] = (best_lr[0], best_lr[1])
def main(samplesheet_path: str, output_path: str):
    samplesheet = pd.read_csv(samplesheet_path, sep="\t")

    seen_wells = set()
    for index, well in samplesheet.iterrows():
        print(well["filepath"])
        metadata, data = fcsparser.parse(well.filepath,
                                         meta_data_only=False,
                                         reformat_meta=True)
        intended_well = well.row + str(well.column).zfill(2)
        if intended_well != metadata["WELL_ID"]:
            warnings.warn(
                "The file {filepath} for plate {plate}, "
                "well {iwell} reports that it comes from well {awell}.".format(
                    filepath=well["filepath"],
                    plate=well["plate"],
                    iwell=intended_well,
                    awell=metadata["WELL_ID"]))
        well_identifier = (well.time, well.plate, well.row, well.column)
        if well_identifier in seen_wells:
            warnings.warn(
                "Plate {plate}, well {well} was listed more than "
                "once for timepoint {time} in the sample sheet.".format(
                    plate=well["plate"], well=intended_well,
                    time=well["time"]))
        else:
            seen_wells.add(well_identifier)

        df = pd.DataFrame({
            "treatment_time":
            well["time"],
            "plate":
            well["plate"],
            "column":
            well["column"],
            "row":
            well["row"],
            "diamide":
            well["diamide"],
            "condition":
            well["condition"],
            "control":
            well["control"],
            "condition_fluor":
            well["condition_fluor"],
            "control_fluor":
            well["control_fluor"],
            "replicate":
            well["replicate"],
            "filepath":
            well["filepath"],
            "FSC_H": (data["FSC LinH"] if "FSC LinH" in data.columns else nan),
            "FSC_A": (data["FSC LinA"] if "FSC LinA" in data.columns else nan),
            "SSC_H": (data["SSC LinH"] if "SSC LinH" in data.columns else nan),
            "SSC_A": (data["SSC LinA"] if "SSC LinA" in data.columns else nan),
            "YFP_H": (data["FITC(530/30) LinH"]
                      if "FITC(530/30) LinH" in data.columns else nan),
            "YFP_A": (data["FITC(530/30) LinA"]
                      if "FITC(530/30) LinA" in data.columns else nan),
            "mCherry_H": (data["MCherry(615/30) LinH"]
                          if "MCherry(615/30) LinH" in data.columns else nan),
            "mCherry_A": (data["MCherry(615/30) LinA"]
                          if "MCherry(615/30) LinA" in data.columns else nan),
            "width":
            data["Width"],
            "cytometer_time":
            data["Time"]
        })
        df.to_csv(output_path,
                  sep="\t",
                  index=False,
                  mode="w" if index == 0 else "a",
                  header=True if index == 0 else False,
                  na_rep="NA")

    expected_wells = set(itertools.product([0,1,2], [1], string.ascii_uppercase[:8], range(1, 13))).union( \
            set(itertools.product([0,1,2], [2], string.ascii_uppercase[:4], range(1, 13))))

    missing_wells = expected_wells - seen_wells

    for missing_well in missing_wells:
        warnings.warn(
            "No data provided for timepoint {time}, plate {plate}, well {well}."
            .format(time=missing_well[0],
                    plate=missing_well[1],
                    well=missing_well[2] + str(missing_well[3])))
Exemple #50
0
    def _on_add_tubes(self):
        """
        Handle "Add tubes..." button.  Add tubes to the experiment.
        """

        # TODO - adding a set of files, then a condition, then another
        # set doesn't work.

        file_dialog = FileDialog()
        file_dialog.wildcard = "Flow cytometry files (*.fcs)|*.fcs|"
        file_dialog.action = "open files"
        file_dialog.open()

        if file_dialog.return_code != PyfaceOK:
            return

        for path in file_dialog.paths:
            try:
                tube_meta = fcsparser.parse(path, meta_data_only=True, reformat_meta=True)
                # tube_channels = tube_meta["_channels_"].set_index("$PnN")
            except Exception as e:
                raise RuntimeError("FCS reader threw an error on tube {0}: {1}".format(path, e.value))

            # if we're the first tube loaded, create a dummy experiment
            if not self.model.dummy_experiment:
                self.model.dummy_experiment = ImportOp(tubes=[CytoflowTube(file=path)], coarse_events=1).apply()

            # check the next tube against the dummy experiment
            try:
                check_tube(path, self.model.dummy_experiment)
            except util.CytoflowError as e:
                error(None, e.__str__(), "Error importing tube")
                return

            tube = Tube()

            for trait_name, trait in self.model.tube_traits.items():
                # TODO - do we still need to check for transient?
                tube.add_trait(trait_name, trait)

                # this magic makes sure the trait is actually defined
                # in tube.__dict__, so it shows up in trait_names etc.
                tube.trait_set(**{trait_name: trait.default_value})
                if trait.condition:
                    tube.on_trait_change(self._try_multiedit, trait_name)

            tube.trait_set(file=path, parent=self.model)

            if "$SRC" in tube_meta:
                self._add_metadata("$SRC", "$SRC", Str(condition=False))
                tube.trait_set(**{"$SRC": tube_meta["$SRC"]})

            if "TUBE NAME" in tube_meta:
                self._add_metadata("TUBE NAME", "TUBE NAME", Str(condition=False))
                tube.trait_set(**{"TUBE NAME": tube_meta["TUBE NAME"]})

            if "$SMNO" in tube_meta:
                self._add_metadata("$SMNO", "$SMNO", Str(condition=False))
                tube.trait_set(**{"$SMNO": tube_meta["SMNO"]})

            self.model.tubes.append(tube)
            self.btn_add_cond.setEnabled(True)
    def test_parse(self):
        """Verify that the fcs parser behaves as expected."""
        self.maxDiff = None
        meta = parse(test_data_file, meta_data_only=True)

        expected_meta = {
            u'$BEGINANALYSIS': u'0',
            u'$BEGINDATA': u'1892',
            u'$BEGINSTEXT': u'0',
            u'$BTIM': u'11:47:24',
            u'$BYTEORD': u'1,2,3,4',
            u'$CELLS': u'PID_101_MG1655_Transformants_D01',
            u'$CYT': u'MACSQuant',
            u'$CYTSN': u'3057',
            u'$DATATYPE': u'F',
            u'$DATE': u'2013-Jul-19',
            u'$ENDANALYSIS': u'0',
            u'$ENDDATA': u'641891',
            u'$ENDSTEXT': u'0',
            u'$ETIM': u'11:47:46',
            u'$FIL': u'EY_2013-07-19_PID_101_MG1655_Transformants_D01_Well_A3.001.fcs',
            u'$MODE': u'L',
            u'$NEXTDATA': 0,
            u'$OP': u'Eugene',
            u'$P10B': 32,
            u'$P10E': u'0.000000,0.000000',
            u'$P10G': u'1',
            u'$P10N': u'V2-W',
            u'$P10R': u'262144',
            u'$P10S': u'V2-W',
            u'$P11B': 32,
            u'$P11E': u'0.000000,0.000000',
            u'$P11G': u'1',
            u'$P11N': u'Y2-A',
            u'$P11R': u'262144',
            u'$P11S': u'Y2-A',
            u'$P12B': 32,
            u'$P12E': u'0.000000,0.000000',
            u'$P12G': u'1',
            u'$P12N': u'Y2-H',
            u'$P12R': u'262144',
            u'$P12S': u'Y2-H',
            u'$P13B': 32,
            u'$P13E': u'0.000000,0.000000',
            u'$P13G': u'1',
            u'$P13N': u'Y2-W',
            u'$P13R': u'262144',
            u'$P13S': u'Y2-W',
            u'$P14B': 32,
            u'$P14E': u'0.000000,0.000000',
            u'$P14G': u'1',
            u'$P14N': u'B1-A',
            u'$P14R': u'262144',
            u'$P14S': u'B1-A',
            u'$P15B': 32,
            u'$P15E': u'0.000000,0.000000',
            u'$P15G': u'1',
            u'$P15N': u'B1-H',
            u'$P15R': u'262144',
            u'$P15S': u'B1-H',
            u'$P16B': 32,
            u'$P16E': u'0.000000,0.000000',
            u'$P16G': u'1',
            u'$P16N': u'B1-W',
            u'$P16R': u'262144',
            u'$P16S': u'B1-W',
            u'$P1B': 32,
            u'$P1E': u'0.000000,0.000000',
            u'$P1G': u'1',
            u'$P1N': u'HDR-T',
            u'$P1R': u'262144',
            u'$P1S': u'HDR-T',
            u'$P2B': 32,
            u'$P2E': u'0.000000,0.000000',
            u'$P2G': u'1',
            u'$P2N': u'FSC-A',
            u'$P2R': u'262144',
            u'$P2S': u'FSC-A',
            u'$P3B': 32,
            u'$P3E': u'0.000000,0.000000',
            u'$P3G': u'1',
            u'$P3N': u'FSC-H',
            u'$P3R': u'262144',
            u'$P3S': u'FSC-H',
            u'$P4B': 32,
            u'$P4E': u'0.000000,0.000000',
            u'$P4G': u'1',
            u'$P4N': u'FSC-W',
            u'$P4R': u'262144',
            u'$P4S': u'FSC-W',
            u'$P5B': 32,
            u'$P5E': u'0.000000,0.000000',
            u'$P5G': u'1',
            u'$P5N': u'SSC-A',
            u'$P5R': u'262144',
            u'$P5S': u'SSC-A',
            u'$P6B': 32,
            u'$P6E': u'0.000000,0.000000',
            u'$P6G': u'1',
            u'$P6N': u'SSC-H',
            u'$P6R': u'262144',
            u'$P6S': u'SSC-H',
            u'$P7B': 32,
            u'$P7E': u'0.000000,0.000000',
            u'$P7G': u'1',
            u'$P7N': u'SSC-W',
            u'$P7R': u'262144',
            u'$P7S': u'SSC-W',
            u'$P8B': 32,
            u'$P8E': u'0.000000,0.000000',
            u'$P8G': u'1',
            u'$P8N': u'V2-A',
            u'$P8R': u'262144',
            u'$P8S': u'V2-A',
            u'$P9B': 32,
            u'$P9E': u'0.000000,0.000000',
            u'$P9G': u'1',
            u'$P9N': u'V2-H',
            u'$P9R': u'262144',
            u'$P9S': u'V2-H',
            u'$PAR': 16,
            u'$SRC': u'A3',
            u'$SYS': u'MACSQuantify,2.4.1247.1dev',
            u'$TOT': 10000,
            '__header__': {'FCS format': b'FCS3.0',
                           'analysis end': 0,
                           'analysis start': 0,
                           'data end': 641891,
                           'data start': 1892,
                           'text end': 1824,
                           'text start': 256}
        }
        self.assertEqual(meta, expected_meta)

        meta, df = parse(test_data_file, meta_data_only=False)

        self.assertEqual(meta, expected_meta)

        expected_columns = [u'HDR-T', u'FSC-A', u'FSC-H', u'FSC-W', u'SSC-A', u'SSC-H',
                            u'SSC-W', u'V2-A', u'V2-H', u'V2-W', u'Y2-A', u'Y2-H', u'Y2-W',
                            u'B1-A', u'B1-H', u'B1-W']
        self.assertListEqual(df.columns.tolist(), expected_columns)

        # Verify that a few selected value fo the data resolve to their expected values.
        subset_of_data = df.iloc[:3, :3].values

        expected_values = np.array([[2.0185113, 459.96298, 437.35455],
                                    [27.451754, -267.17465, 365.35455],
                                    [32.043865, -201.58234, 501.35455]], dtype=np.float32)

        assert_array_almost_equal(subset_of_data, expected_values)
Exemple #52
0
                              index = new_data.index,
                              dtype = meta_type)
                
                # if we're categorical, merge the categories
                if meta_type == "category" and meta_name in self.data.columns:
                    cats = set(self.data[meta_name].cat.categories) | set(new_data[meta_name].cat.categories)
                    self.data[meta_name] = self.data[meta_name].cat.set_categories(cats)
                    new_data[meta_name] = new_data[meta_name].cat.set_categories(cats)
            except (ValueError, TypeError):
                raise util.CytoflowError("Had trouble converting conditions {1}"
                                         "(value = {2}) to type {3}" \
                                         .format(meta_name,
                                                 meta_value,
                                                 meta_type))
        
        self.data = self.data.append(new_data, ignore_index = True)
        del new_data

if __name__ == "__main__":
    import fcsparser
    ex = Experiment()
    ex.add_conditions({"time" : "category"})

    tube0, _ = fcsparser.parse('../cytoflow/tests/data/tasbe/BEADS-1_H7_H07_P3.fcs')
    tube1, _ = fcsparser.parse('../cytoflow/tests/data/tasbe/beads.fcs')
    tube2, _ = fcsparser.parse('../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs')
    
    ex.add_tube(tube1, {"time" : "one"})
    ex.add_tube(tube2, {"time" : "two"})
    
Exemple #53
0
                       hue = (self.huefacet if self.huefacet else None),
                       col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                       row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                       hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                       # something buggy here.
                       #orient = ("h" if self.orientation == "horizontal" else "v"),
                       estimator = self.function,
                       ci = None,
                       kind = "bar")

if __name__ == '__main__':
    import cytoflow as flow
    import fcsparser
    
    tube1 = fcsparser.parse('../../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs',
                            reformat_meta = True,
                            channel_naming = "$PnN")

    tube2 = fcsparser.parse('../../cytoflow/tests/data/Plate01/CFP_Well_A4.fcs',
                            reformat_meta = True,
                            channel_naming = "$PnN")
    
    tube3 = fcsparser.parse('../../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs',
                            reformat_meta = True,
                            channel_naming = "$PnN")

    tube4 = fcsparser.parse('../../cytoflow/tests/data/Plate01/CFP_Well_A4.fcs',
                            reformat_meta = True,
                            channel_naming = "$PnN")
    
    ex = flow.Experiment()
Exemple #54
0
                pd.Series(data = [meta_value] * len(new_data),
                          index = new_data.index,
                          dtype = meta_type)

            # if we're categorical, merge the categories
            if is_categorical_dtype(meta_type) and meta_name in self.data:
                cats = set(self.data[meta_name].cat.categories) | set(
                    new_data[meta_name].cat.categories)
                self.data[meta_name] = self.data[meta_name].cat.set_categories(
                    cats)
                new_data[meta_name] = new_data[meta_name].cat.set_categories(
                    cats)

        self.data = self.data.append(new_data, ignore_index=True)
        del new_data


if __name__ == "__main__":
    import fcsparser
    ex = Experiment()
    ex.add_conditions({"time": "category"})

    tube0, _ = fcsparser.parse(
        '../cytoflow/tests/data/tasbe/BEADS-1_H7_H07_P3.fcs')
    tube1, _ = fcsparser.parse('../cytoflow/tests/data/tasbe/beads.fcs')
    tube2, _ = fcsparser.parse(
        '../cytoflow/tests/data/Plate01/RFP_Well_A3.fcs')

    ex.add_tube(tube1, {"time": "one"})
    ex.add_tube(tube2, {"time": "two"})
Exemple #55
0
        n_samples, n_groups, n_dim)

    data, labels = generate_test_data(n_samples, n_dim, n_groups=n_clusters)
    sample_names = [str(l) for l in labels]
else:
    analysis_title = pathlib.Path(filenames[0]).stem
    dataset = []
    labelset = []
    sample_names = []
    for filename in filenames:
        if verbose:
            sys.stderr.write("{} loading file : {}\n".format(
                datetime.datetime.now().strftime("[%H:%M:%S]"), filename))
        if filename.endswith(".fcs"):
            import fcsparser
            meta, data = fcsparser.parse(filename)
            data = data.T
            color2name = {}
            nondata_columns = ['EQBeads', 'Time', 'Width', 'Event']
            for key, val in meta.items():
                if isinstance(val, str) is False:
                    continue
                m = re.match("\\$(\\w\\d+)N", key)
                if m:
                    color_code = m.group(1)
                    color_name_code = "$" + color_code + "S"
                    if color_name_code in meta:
                        name = meta[color_name_code]
                        if name in nondata_columns:
                            continue
                        color2name[val] = meta[color_name_code]
    def estimate(self, experiment, subset = None): 
        """
        Estimate the bleedthrough from the single-channel controls in `controls`
        """
        if not experiment:
            raise CytoflowOpError("No experiment specified")
        
        if self.num_knots < 3:
            raise CytoflowOpError("Need to allow at least 3 knots in the spline")
        
        self._channels = self.controls.keys()
    
        for channel in self._channels:
            try:
                tube_meta = fcsparser.parse(self.controls[channel], 
                                            meta_data_only = True, 
                                            reformat_meta = True)
                tube_channels = tube_meta["_channels_"].set_index("$PnN")
            except Exception as e:
                raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\
                                   .format(self.controls[channel], e.value))

            for channel in self._channels:
                exp_v = experiment.metadata[channel]['voltage']
            
                if not "$PnV" in tube_channels.ix[channel]:
                    raise CytoflowOpError("Didn't find a voltage for channel {0}" 
                                          "in tube {1}".format(channel, self.controls[channel]))
                
                control_v = tube_channels.ix[channel]["$PnV"]
                
                if control_v != exp_v:
                    raise CytoflowOpError("Voltage differs for channel {0} in tube {1}"
                                          .format(channel, self.controls[channel]))

        self._splines = {}
        mesh_axes = []

        for channel in self._channels:
            self._splines[channel] = {}

            try:
                tube_meta, tube_data = fcsparser.parse(self.controls[channel], 
                                                       reformat_meta = True)
                tube_channels = tube_meta["_channels_"].set_index("$PnN")
            except Exception as e:
                raise CytoflowOpError("FCS reader threw an error on tube {0}: {1}"\
                                   .format(self.controls[channel], e.value))
            
            data = tube_data.sort(channel)

            for af_channel in self._channels:
                if 'af_median' in experiment.metadata[af_channel]:
                    data[af_channel] = data[af_channel] - \
                                    experiment.metadata[af_channel]['af_median']

            channel_min = data[channel].min()
            channel_max = data[channel].max()
            
            # we're going to set the knots and splines evenly across the hlog-
            # transformed data, so as to capture both the "linear" aspect
            # of near-0 and negative values, and the "log" aspect of large
            # values

            # parameterize the hlog transform
            r = experiment.metadata[channel]['range']  # instrument range
            d = np.log10(r)  # maximum display scale, in decades
            
            # the transition point from linear --> log scale
            # use half of the log-transformed scale as "linear".
            b = 2 ** (np.log2(r) / 2)
            
            # the splines' knots
            knot_min = channel_min
            knot_max = channel_max
            
            hlog_knot_min, hlog_knot_max = \
                hlog((knot_min, knot_max), b = b, r = r, d = d)
            hlog_knots = np.linspace(hlog_knot_min, hlog_knot_max, self.num_knots)
            knots = hlog_inv(hlog_knots, b = b, r = r, d = d)
            
            # only keep the interior knots
            knots = knots[1:-1] 
            
            # the interpolators' mesh            
            mesh_min = -3 * experiment.metadata[channel]['af_stdev']
            mesh_max = r
                
            hlog_mesh_min, hlog_mesh_max = \
                hlog((mesh_min, mesh_max), b = b, r = r, d = d)
            hlog_mesh_axis = \
                np.linspace(hlog_mesh_min, hlog_mesh_max, self.mesh_size)
            
            mesh_axis = hlog_inv(hlog_mesh_axis, b = b, r = r, d = d)
            mesh_axes.append(mesh_axis)
            
            for to_channel in self._channels:
                from_channel = channel
                if from_channel == to_channel:
                    continue
                
                self._splines[from_channel][to_channel] = \
                    scipy.interpolate.LSQUnivariateSpline(data[from_channel].values,
                                                          data[to_channel].values,
                                                          t = knots,
                                                          k = 1)
         
        
        mesh = pandas.DataFrame(cartesian(mesh_axes), 
                                columns = [x for x in self._channels])
         
        mesh_corrected = mesh.apply(_correct_bleedthrough,
                                    axis = 1,
                                    args = ([[x for x in self._channels], 
                                             self._splines]))
        
        for channel in self._channels:
            chan_values = np.reshape(mesh_corrected[channel], [len(x) for x in mesh_axes])
            self._interpolators[channel] = \
                scipy.interpolate.RegularGridInterpolator(mesh_axes, chan_values)