Beispiel #1
0
def pre_process(cFile, points=500, channels=('FITC-A', 'PE-Texas Red-A'), fcs=True):
	"""Pre-processes datasets to usable forms."""

	text_name = cFile.split('.')[0] + ".txt"

	if fcs:
		sample = FCMeasurement(ID="sample", datafile=cFile)
		allowed = list(sample.meta['_channel_names_'])

		for chan in channels:
			if chan not in allowed:
				raise ValueError('%s is not a valid channel. Valid channels for %s include:\n%s' % (chan, cFile, str(allowed)))

		data_columns = (sample.data[meas].values for meas in channels)
		np.savetxt(text_name, np.column_stack(data_columns), delimiter='\t', fmt="%.2f")

	#Open file and check for conformation to format.
	f = open(text_name, 'r')
	fData = f.readlines()
	f.close()

	if len(fData[0].strip('\n').split('\t')) != len(channels):
		raise IOError('Input files must be %s-column, tab-delimited text files.' % str(len(channels)))

	#Parse data files, generate random points.
	signals = {n: np.array([float(x.strip('\n').split('\t')[n]) for x in fData]) for n in range(len(channels))}
	indices = random.sample(range(signals[0].size), points)
	randsignals = {k: v[indices] for k, v in signals.iteritems()}

	#Add to array.
	sig_values = tuple(randsignals.values())
	darray = np.vstack(sig_values).T

	return darray
Beispiel #2
0
def get_PCA_FCS(temp_file_path):
    
    datafile= temp_file_path
    sample = FCMeasurement(ID='', datafile=datafile)
    hlog_comp_array = get_compensated_array(sample)
    pca_fracs,pca_Wt = get_PCA(hlog_comp_array)
    return (pca_fracs,pca_Wt)
 def _load(self):
     for file in os.listdir(self.datadir):
         if file.endswith(".fcs") and self.pattern in file:
             print(file)
             self.samples.append(
                 FCMeasurement(ID=file, datafile=self.datadir + "/" + file))
     return self.samples
def path_2_sample(path, id_name, transform=False):
    """Gets a measurement from the given fcs file path, transforming its scatter values using a hyperlog
    transformation if specified."""
    sample = FCMeasurement(ID=id_name, datafile=path)
    if transform:
        return transform_sample(sample)
    return sample
Beispiel #5
0
    def __init__(self,
                 file_address,
                 if_fcs=True,
                 if_drop=True,
                 drop_col=['Time']):
        """
        Read the fcs file as pd.Dataframe

        Parameters
        ----------
        file_address : string 
                       e.g. r'#40 Ab.fcs' or 'flowmetry.csv'
        if_fcs : bool
                 whethe the imput file is fcs file. If not, it should be a csv file
        if_drop : bool
                  define whether some columns should be ignored
        drop_col : list of strings
                   list of column names to be dropped
        """
        if if_fcs:
            self.info = FCMeasurement(ID='Train', datafile=file_address)
            df = self.info.data
        else:
            df = pd.read_csv(file_address)

        self.df = df
        if if_drop:
            self.df = df.drop(drop_col, axis=1)
    def predict(self, file_id):
        dt = datetime.now()
        self.response['ts'] = dt.microsecond
        self.response['file_name'] = file_id
        fcs_file_name = file_id
        datafile = os.path.join(SHARED_RAW_DIR, fcs_file_name)
        sample = FCMeasurement(ID='Test Sample', datafile=datafile)
        df = sample.data
        df = self.pre_process(df)
        X = df.iloc[:, 0:len(PRINCIPAL_COMPONENTS)].values
        # Scale the data - feature scaling
        sc = StandardScaler()
        X = sc.fit_transform(X)
        predictions = {}
        counts = {}
        model_names = ['Logistic Regression', 'Decision Tree', 'Random Forest']

        for i in range(len(self.models_array)):
            a = self.models_array[i].predict(X)
            unique_elements, counts_elements = np.unique(a, return_counts=True)
            unique_elements = unique_elements.tolist()
            counts_elements = counts_elements.tolist()
            predictions['elements'] = unique_elements
            counts[model_names[i]] = dict(zip(unique_elements, counts_elements))

        predictions['counts'] = counts
        self.response['predictions'] = predictions
        return self.response
def prep_fcs(file_path, mosaic_object):
    """
    :param file_path: Path to FCS file
    :param mosaic_object:
    :return: prepped FCS data, as a pandas dataframe
    """
    # data import
    all_fcs = FCMeasurement(ID='A1l', datafile=file_path)
    data = all_fcs.data
    data = data[[mosaic_object.fsc, mosaic_object.ssc, mosaic_object.fl1]]

    # remove zero elements
    data = data.loc[data[mosaic_object.fl1] > 0]

    # toggle for linear and log data
    if mosaic_object.amplification:
        data[mosaic_object.fl1] = data[mosaic_object.fl1].apply(math.log)

    # run model
    fsc_ecdf = ECDF(data[mosaic_object.fsc])
    data[mosaic_object.fsc] = fsc_ecdf(data[mosaic_object.fsc])
    ssc_ecdf = ECDF(data[mosaic_object.ssc])
    data[mosaic_object.ssc] = ssc_ecdf(data[mosaic_object.ssc])
    sub_filter1 = (data[mosaic_object.fsc] >= mosaic_object.fsc_filt[0])
    sub_filter2 = (data[mosaic_object.fsc] <= mosaic_object.fsc_filt[1])
    sub_data = data.loc[sub_filter1 & sub_filter2]
    sub_filter3 = sub_data[mosaic_object.ssc] >= mosaic_object.ssc_filt[0]
    sub_filter4 = sub_data[mosaic_object.ssc] <= mosaic_object.ssc_filt[1]
    sub_data = sub_data.loc[sub_filter3 & sub_filter4]

    return sub_data
Beispiel #8
0
 def conv_append(self, tube, caseNum, tubeNum):
     sample = FCMeasurement(ID='Test Sample', datafile=tube)
     sample_numpy = sample.data.values
     self.channel_length = len(sample.channels) - 1
     for x in range(self.cells_per_tube):
         for y in range(self.channel_length):
             self.conv_dataset[caseNum][y][tubeNum * self.cells_per_tube +
                                           x] = sample_numpy[self.buffer +
                                                             x][y]
def load_file(File_name):
    Path = File_name
    sample = FCMeasurement(ID='Test Sample', datafile=Path)
    FSC=NP.array(sample.data[['FSC-H']]) # Forward scatter
    SSC=NP.array(sample.data[['SSC-H']]) # Size scatter
    GFP=NP.array(sample.data[['FL1-H']]) #GFP
    current_data=NP.array(sample.data[['FSC-H','FL1-H','SSC-H']]) # matrix that contains all of them
    sample_id = sample.meta[u'SAMPLE ID']
    return current_data,sample_id
Beispiel #10
0
def compile_untreated(date, cellFrac):
    """Adds all data from a single patient to an FC file"""
    pathname = join(path_here,
                    "ckine/data/Flow_Data_Meyer/" + date + "/Untreated/")
    pathlist = Path(r"" + str(pathname)).glob("**/*.fcs")
    FCfiles = []
    for path in pathlist:
        FCfiles.append(FCMeasurement(ID="All Data", datafile=path))
    return combineWells(FCfiles, cellFrac, date)
Beispiel #11
0
def importFlow(directory):
    toImport = os.listdir(directory)
    toImport = [entry for entry in toImport if entry[-4:] == '.fcs']

    samples = [
        FCMeasurement(ID=entry, datafile=directory + '/' + entry)
        for entry in toImport
    ]
    return samples
Beispiel #12
0
def compute_median_log(dirr):
    files = sorted_nicely(glob(dirr + "*.fcs"))
    all_samples = [FCMeasurement(ID=f, datafile=f) for f in files]
    x = map(lambda sample: np.median(clean_sample(sample)), all_samples)
    x = np.reshape(x, [8, 12])
    x = np.fliplr(x)
    x = np.flipud(x)
    x += 1.
    x = x.T
    x /= x[0, 0]
    return (x)
Beispiel #13
0
    def FCScheck(self, datafile):
        filematch = re.compile(
            "PANEL[\s_][AB][\s_][A][A-Z][A-Z][A-Z][\s_][0-9]{1,5}",
            flags=re.X | re.I)

        rawfilename = datafile.split("/")[-1]

        filesampleID = "_".join(rawfilename.split("_")[2:4])
        #sampleID=str(sampleID.strip().lstrip("(u\'"))

        codeonly = re.compile("[A][A-Z][A-Z][A-Z]_[0-9]{1,4}")
        controlonly = re.compile("[B][6][N][C]_[0-9]{1,4}")

        omitcontrol = re.compile(
            "(^STAINED)|(Control)|(FMO)|(^UNSTAINED)|(^Specimen)|(,3a,)",
            flags=re.X | re.I)
        compFileNameA = re.compile("[A-z_]*PANELA$", flags=re.X | re.I)
        compFileNameB = re.compile("[A-z_]*PANELB$", flags=re.X | re.I)
        compFiles = re.compile("Compensation", flags=re.X | re.I)

        FluoroMatch = re.compile(
            """Compensation(\s|-|_){1,3}Controls(\s|-|_){0,3}(
			(CD25(\s|-|_){0,2}PE(\s|-|_){0,2}CY7)|
			(BV421)|
			(BV510)|
			(BV786)|
			(FITC)|
			(PE)|
			(APC)|
			(CD8a(\s|-|_){0,2}PE(\s|-|_){0,2}CF594)|
			(CD11b(\s|-|_){0,2}PE(\s|-|_){0,2}CF594)|
			(CD11C(\s|-|_){0,2}PE(\s|-|_){0,2}CY7)|
			(CD62L(\s|-|_){0,2}APC(\s|-|_){0,2}CY7)|
			(MHCII(\s|-|_){0,2}APC(\s|-|_){0,2}CY7)
			)""",
            flags=re.X | re.I)

        fullIMPC = re.compile("IMPC[12][\s_-]")

        sample = FCMeasurement(ID="Tops", datafile=datafile)
        mdata = sample.meta.keys()
        sampleID = sample.meta["TUBE NAME"]
        sampleID = str(sampleID.strip().lstrip("(u\'"))
        FullName = sample.meta["$FIL"]
        panel = sample.meta["$SRC"]
        if not panel.endswith("_"):
            panel = "_".join(panel.split()) + "_"
        else:
            panel = "_".join(panel.split())

        return sampleID, filesampleID
Beispiel #14
0
 def test_hlog_on_fc_measurement(self):
     fc_measurement = FCMeasurement(ID='test', datafile=test_path)
     fc_measurement = fc_measurement.transform(transform='hlog', b=10)
     data = fc_measurement.data.values[:3, :4]
     correct_output = np.array([
         [-8.22113965e+03, 1.20259949e+03, 1.01216449e-06, 5.21899170e+03],
         [-8.66184277e+03, 1.01013794e+03, 1.01216449e-06, 5.71275928e+03],
         [-8.79974414e+03, 1.52737976e+03, 1.01216449e-06, -4.95852930e+03]
     ])
     np.testing.assert_array_almost_equal(
         data,
         correct_output,
         5,
         err_msg='the hlog transformation gives '
         'an incorrect result')
 def train(self):
     # Make file path
     datafile = os.path.join(SHARED_RAW_DIR, TRAIN_FCS_FILE)
     train_sample = FCMeasurement(ID='Test Sample', datafile=datafile)
     # Get DF
     self.train_df = train_sample.data
     # Preprocess
     self.train_df = self.pre_process(self.train_df)
     # Compute diagnosis
     self.compute_diagnosis()
     # Show head
     # Split
     self.split_train_test()
     # Make models
     self.models_array = self.models()
Beispiel #16
0
    def load_fcs(self, filepath=None, parent=None):
        ax = self.ax

        if parent is None:
            parent = self.fig.canvas

        if filepath is None:
            from FlowCytometryTools.gui import dialogs
            filepath = dialogs.open_file_dialog('Select an FCS file to load',
                                                'FCS files (*.fcs)|*.fcs',
                                                parent=parent)

        if filepath is not None:
            self.sample = FCMeasurement('temp', datafile=filepath)
            print('WARNING: Data is raw (not transformation).')
            self._sample_loaded_event()
Beispiel #17
0
Datei: facs.py Projekt: xies/FACS
def load_facs(file_name):
    """
    Load .fcs data and converts into a pandas DataFrame
    
    Parameters
    ----------
        file_name - path string to the .fjo file
        
    Returns
    -------
        df- pandas DataFrame
    
    """
    sample = FCMeasurement(ID='Test Sample', datafile=file_name)
    parameters = list(sample.channel_names)
    for par in range(len(parameters)):
        parameters[par] = parameters[par].encode('ascii', 'ignore')
        raw_data = sample.data.values
    # sample_id = sample.meta['SampleID'].encode('ascii','ignore')

    return raw_data, np.array(parameters)
Beispiel #18
0
    def exptdf(self, exptdate, **kwargs):
        """
        Return a dataframe holding all flow observations
        found according to the master_idx

        Optionally pass a master_index_df kwarg to avoid
        trying to automatically set a master_index_df
        """
        if 'master_index_df' in kwargs:
            master_idx = kwargs['master_index_df']
        else:
            master_idx = self.master_idx_by_date(exptdate)

        sampledfs = []
        # Read in data and add identifying information
        # based on master index
        print(f'Found master index with {len(master_idx)} samples at')
        for idx in master_idx.index:
            row = master_idx.loc[idx, :]
            print(f'Looking for data at {row.filepath}')

            if os.path.exists(row.filepath):
                print(f'Found data')
                sampledf = FCMeasurement(ID=f'{row.strain}-{row.clone}',
                                         datafile=row.filepath).data
                print(f'Found {len(sampledf)} measurements in this file')
                # Annotate sample df
                for col in row.index:
                    sampledf.loc[:, col] = row.loc[col]
                sampledfs.append(sampledf)
            else:
                print(f'No data found')

        if len(sampledfs) > 0:
            exptdf = pd.concat(sampledfs, ignore_index=True)
        else:
            exptdf = None
            print(f'No data found for exptdate {exptdate}')

        return exptdf
Beispiel #19
0
    def __read_fcs_file_to_fcm(self, fcs_file_name):
        fcs_file = os.path.join(SHARED_RAW_DIR, fcs_file_name)
        if not os.path.exists(fcs_file):
            print('FCS file does not exist ', fcs_file)
            # return False
            fcs_file = os.path.join(SHARED_RAW_DIR,
                                    'fcs_file.fcs')  # running from cli

        # Load data
        tsample = FCMeasurement(ID='Test Sample', datafile=fcs_file)
        if self.transformation:
            tsample = tsample.transform(self.transformation, b=self.bins)

        self.channel_names = tsample.channel_names
        if not self.channel_name1 and not self.channel_name2:
            print('Check if channel names False', self.channel_names)
            self.channel_name1 = self.channel_names[0]
            self.channel_name2 = self.channel_names[1]
        else:
            self.channel_names = [self.channel_name1, self.channel_name2]

        self.sample = tsample  # tsample.transform('hlog', channels=['Y2-A', 'B1-A', 'V2-A'], b=500.0)
Beispiel #20
0
def importF2(pathname, WellRow):
    """
    Import FCS files. Variable input: name of path name to file. Output is a list of Data File Names in FCT Format
    Title/file names are returned in the array file --> later referenced in other functions as title/titles input argument
    """
    # Declare arrays and int
    file = []
    sample = []
    z = 0
    # Read in user input for file path and assign to array file
    pathlist = Path(r"" + str(pathname)).glob("**/*.fcs")
    for path in pathlist:
        wellID = path.name.split("_")[1]
        if wellID[0] == WellRow:
            file.append(str(path))
    file.sort()
    assert file != []
    # Go through each file and assign the file contents to entry in the array sample
    for entry in file:
        sample.append(FCMeasurement(ID="Test Sample" + str(z), datafile=entry))
        z += 1
    # Returns the array sample which contains data of each file in folder (one file per entry in array)
    return sample, file
def graph_cyto(gate, wells, output_file):
    directory=r'/Users/anazuniga/Documents/phyton/tc/'
    n=0
    #plt.subplots(figsize=(4,2.4))
    plt.subplots(figsize=(8,2))
    #fig.set_size_inches(1, 1) 
    plt.subplots_adjust(hspace=0.0, wspace= 0.0)
    inp=gate
    for x in wells:
        if inp[n]=='1':
            c='#00a651'
        else:
            c='#6D6E71'
        n+=1
        datafile=directory+'/export_'+x+'_Single Cells.fcs'
        sample = FCMeasurement(ID='Test Sample', datafile=datafile)
        #density = stats.gaussian_kde(sample['BL1-H'])
        logbins = np.geomspace(10, 1000000, 100)
        ax=plt.subplot(1,16,n)
        #ax=plt.subplot(1,8,n)
        ax.hist(sample['GFP-H'], bins=logbins, orientation="horizontal", color='#00a651')
        plt.yscale('log')
        #ax.set_ylim(25, 750000)
        ax.set_ylim(10, 500000)
        ax.xaxis.set_ticks_position('none')
        ax.set_xticks([])
        #ax.axhline(95000, color="black", linestyle='--', dashes=(4,4),lw=1)
        ax.axhline(200, color="black", linestyle='--', dashes=(4,4),lw=1)
        #ax.axhline(155000, color="black", linestyle='--', dashes=(4,4),lw=1)
        #ax.axhline(240, color="black", linestyle='--', dashes=(4,4),lw=1)
        #ax.xticks([])
        #ax.xlabel('')
        
            #ax.set_ylabel("FI (A.U.)", fontsize=14)
    #plt.savefig("/Users/anazuniga/Documents/phyton/FlowCytometryTools/FCplate/D0 plots/"+output_file+".svg", format="svg")
    plt.savefig("/Users/anazuniga/Documents/phyton/tc/"+output_file+".pdf", format="pdf")
Beispiel #22
0
def analyze(datafile):
    sample = FCMeasurement(ID=datafile, datafile=datafile)

    hists = []
    channels = []
    bin_edges = []
    for channel, data in zip(sample.channel_names, sample.data.values.T):
        if channel == 'Time':
            continue
        print("{}: Min: {}, Max: {}".format(channel, np.min(data),
                                            np.max(data)))
        # Convert scale
        if channel.startswith('FSC') or channel.startswith('SSC'):
            pass
        else:
            data[data <=
                 0] = 1  # For convenience until biexponential is implemented
            data = np.log10(data)
        # modify range
        if channel == 'FSC-W':
            data = data[data < 1.5e5]
            pass
        hist, bin_edge = np.histogram(data.flatten(), bins=100)
        hists.append(hist)
        channels.append(channel)
        bin_edges.append(bin_edge)

    fig = plt.figure()
    i = 0
    for channel, hist, bin_edge in zip(channels, hists, bin_edges):
        i += 1
        ax = fig.add_subplot(len(sample.channel_names) / 4, 4, i)
        ax.bar(bin_edge[:-1], hist / 1000, width=bin_edge[1:] - bin_edge[:-1])
        ax.ticklabel_format(axis='x', style='sci', scilimits=(1, 4))
        ax.set_title(channel)
    plt.show()
def rename_fcs(fcs_filelist, sample, md, datadir, gatingdir, tmpdir, logfile):
    well = sample.well
    plate_num = sample.plate

    result = find_files(fcs_filelist, plate_num, well, logfile)
    datafile = result['datafile']
    gatefile = result['gatefile']

    # [Pull out the metadata] -------------------------------------------------------------------------------
    # Read in FCS data to get the metadata
    # TODO: check if datafile has > 1 arg.
    if len(datafile) == 0:
        # error should have already been recorded in the 'find_files' function
        print("datafile is empty")
    else:
        fcs_data = FCMeasurement(ID=well,
                                 datafile=tmpdir + datafile,
                                 readdata=False)
        fcs_date = pd.to_datetime(fcs_data.meta['$DATE'])
        fcs_date = fcs_date.strftime("%Y-%m-%d")

        # Create filenames
        fcs_filename = f'{sample.sample_id}-plate{plate_num}-{well}_sysserology-{sample.expt_id}_{fcs_date}.fcs'
        gate_filename = f'{sample.sample_id}-plate{plate_num}-{well}_sysserology-{sample.expt_id}_{fcs_date}_gates.xml'

        md = get_metadata(sample, fcs_data, fcs_date, fcs_filename, md)

        # [Rename the data files] -------------------------------------------------------------------------------
        os.rename(tmpdir + datafile, f'{datadir}/{fcs_filename}')

    try:
        os.rename(tmpdir + gatefile, f'{gatingdir}/{gate_filename}')
    except:
        # error should have already been recorded in the 'find_files' function; no need to log again
        pass
    return md
Beispiel #24
0
import os

from pylab import *

import FlowCytometryTools
from FlowCytometryTools import FCMeasurement

# Locate sample data included with this package
datadir = os.path.join(FlowCytometryTools.__path__[0], 'tests', 'data',
                       'Plate01')
datafile = os.path.join(datadir, 'RFP_Well_A3.fcs')

# datafile = '[insert path to your own fcs file]'

# Load data
tsample = FCMeasurement(ID='Test Sample', datafile=datafile)
tsample = tsample.transform('hlog', channels=['Y2-A', 'B1-A', 'V2-A'], b=500.0)

# Plot
tsample.plot(['Y2-A', 'B1-A'], kind='scatter', alpha=0.6, color='gray')
grid(True)

#show() # <-- Uncomment when running as a script.
Beispiel #25
0
def fcs_to_pandas(fcs_folder, fcs_file):
    return pd.DataFrame(FCMeasurement(ID=fcs_file, datafile=fcs_folder+fcs_file)[:])
Beispiel #26
0
def load_fcs_data(filename):
    filepath = os.path.join(_DATA_DIR, filename)
    if filename.endswith('.pk'):
        return pickle.load(filepath)
    return FCMeasurement(ID=filename, datafile=filepath)
Beispiel #27
0
def extract(datafile):
    sample = FCMeasurement(ID='Test Sample', datafile=datafile)
    print(sample.channel_names)
    return (sample.data, sample.channel_names)
Beispiel #28
0
 def __init__(self, fcs):
     self.prf = fcs
     msg('loading %s' % fcs)
     self.dat = FCMeasurement(ID=fcs, datafile=fcs)
     self.clusters = {}
     self.stats = {}
Beispiel #29
0
import seaborn as sb
import pandas as pd
import matplotlib.path as mplPath
import matplotlib.pyplot as plt
from FlowCytometryTools import FCMeasurement
from roipoly import roipoly
from scipy import stats

sb.set()


#%%

# T98G
filename = '/Users/xies/Box/Others/SZ_FACS_RB/SZ-091320 Async OPP_Group_T98G OPP647 2nd488.fcs'
tg_bg = FCMeasurement(ID='Test Sample', datafile=filename)

size_bins = np.linspace(tg_bg['FSC-A'].min(),tg_bg['FSC-A'].max(),num = 26)
bg_rb = get_bin_means(tg_bg['FSC-A'],tg_bg['BL1-A'],size_bins)

filename = '/Users/xies/Box/Others/SZ_FACS_RB/SZ-091320 Async OPP_Group_T98G OPP647 RB488.fcs'
tg = FCMeasurement(ID='Test Sample', datafile=filename)

dapi = tg['VL1-A']
fsc = tg['FSC-A']
plt.scatter(dapi,fsc,alpha=0.002)
g1_gate = roipoly(color='r')
g1_gate_p = mplPath.Path( np.vstack((g1_gate.x,g1_gate.y)).T )
g1_gateI = g1_gate_p.contains_points( np.vstack((tg['VL1-A'],tg['FSC-A'])).T )
tg_g1 = tg[g1_gateI]
Beispiel #30
0
 def read_data(self):
     self.sample = FCMeasurement(ID='Test Sample', datafile=self.datafile)