Beispiel #1
0
def generatedicts(studies, numsubs, files, pathtofcs, markerset, split,
                  loadbar):
    alldata = {}
    prod = it.product(range(numsubs), studies)  # (study, subject): array
    print('Generating dictionaries "alldata", "alltrain", and "alltest".')
    feedfiles = loading(files, loadbar)
    for filename in feedfiles:  # make a dictionary with a (study, subject) code as the keys and the fcs array as the value
        data = fcm.loadFCS(pathtofcs + filename)
        df = pd.DataFrame(np.array(data), columns=cleanmarkers(data.channels))
        colset = set(df.columns)
        if colset != markerset:
            df = df.drop(columns=list(
                colset -
                markerset))  # colset should never be smaller than markerset
        df = df[list(markerset)]  # to put them all in the same order
        df = df.sample(frac=1).reset_index(drop=True)
        alldata[next(prod)] = df.as_matrix()

    alltrain = {}
    alltest = {}
    feedkeys = loading(alldata.keys(), loadbar)
    for studsub in feedkeys:  # split all the data into train and test arrays
        if studsub[1] in split[
                0]:  # the 1 corresponds to order of multiplicands in cartesian product above (prod=it.product())
            length = len(alldata[studsub])
            splittrain = np.random.choice(range(length),
                                          size=length / 2,
                                          replace=False)
            splittest = np.setdiff1d(np.arange(length), splittrain)
            alltrain[studsub] = alldata[studsub][splittrain, :]
            alltest[studsub] = alldata[studsub][splittest, :]
        else:
            alltest[studsub] = alldata[studsub]
    return alltrain, alltest
Beispiel #2
0
def read_healthy_data(basedir, keys, stimuli, marker_idx):
	lookup = dict()

	for key in keys:
		subdir = os.path.join(basedir, key)
		data_list, stim_list = [], []
	
		for jj, stim in enumerate(stimuli):
			fname = os.path.join(subdir,
								'_'.join([key, stim, 'PhenoGraph.csv']))
			try:

				# load the raw data
				x = fcm.loadFCS(fname, transform=None)
				print [x.channels[ii] for ii in marker_idx]
				
				# select interesting markers and arcsinh-transform
				x = ftrans(np.asarray(x)[:,marker_idx], 5)
				
				# merge data from different stimulation conditions
				data_list.append(x)
				
			except Exception:
				print 'Problem loading: ' + fname 
				pass
Beispiel #3
0
def get_file_data(dataPath,dataType='fcs',channelsOnly=False):

    '''
    load file data

    '''

    if dataType not in ['fcs','txt','pickle']:
        print "ERROR in tools.get_file_data -- bad data type ", dataType
        return None, None

    if os.path.isfile(dataPath) == False:
        print "WARNING in tools.get_file_data -- cannot get fcs data bad file path"
        return None,None

    fcsData,fileChannels = None, None

    if dataType == 'fcs':
        if channelsOnly == False:
            fcsData = fcm.loadFCS(dataPath)
        fileChannels = fcsData.channels
    elif dataType == 'pickle':
        if channelsOnly == False:
            fid = open(dataPath,'rb')
            fcsData = cPickle.load(fid)
            fid.close()
        fileChannels = None
    else:
        if channelsOnly == False:
            fcsData = read_txt_into_array(dataPath)
        fileChannels = fileChannels = read_txt_to_file_channels(re.sub("\.out",".txt",dataPath))

    return fcsData, fileChannels
Beispiel #4
0
def setmarkers(pathtofcs, files, ignorechan=[]):
    '''
    Define the set of markers that will be used, should be run through ALL files
    that will be used to train the model, not just those that are going through
    an individual batch. This list will become a global variable. A check should be
    made to check that the markerset is at least, say, 20 elements long.
    '''
    markerlist = list()
    for filename in files:
        # Load the channel names:
        markers = [i for i in fcm.loadFCS(pathtofcs + filename).channels]

        # Clean up the markers so they're consistent across files
        markers = cleanmarkers(markers)

        # Don't include anything that was set to be ignored
        markers = [i for i in markers if i not in ignorechan]

        markerlist.append(markers)
    markerset = set.intersection(*map(set, markerlist))
    assert len(markerset) > 20, \
    'Not enough common markers among the fcs files. Please check that channel names are' + \
    ' consistent between fcs files looking at the same marker or lower threshold. Note ' + \
    'lowering threshold could have negative effects on test set accuracy.'

    return markerset
Beispiel #5
0
def read_healthy_data(basedir, keys, stimuli, marker_idx):
    lookup = dict()

    for key in keys:
        subdir = os.path.join(basedir, key)
        data_list, stim_list = [], []

        for jj, stim in enumerate(stimuli):
            fname = os.path.join(subdir,
                                 '_'.join([key, stim, 'PhenoGraph.csv']))
            try:

                # load the raw data
                x = fcm.loadFCS(fname, transform=None)
                print[x.channels[ii] for ii in marker_idx]

                # select interesting markers and arcsinh-transform
                x = ftrans(np.asarray(x)[:, marker_idx], 5)

                # merge data from different stimulation conditions
                data_list.append(x)

            except Exception:
                print 'Problem loading: ' + fname
                pass

        lookup[key] = np.vstack(data_list)
    return lookup
Beispiel #6
0
def get_data(indir, info, marker_names, do_arcsinh, cofactor):
    fnames, phenotypes = info[:, 0], info[:, 1]
    sample_list = []
    for fname in fnames:
        full_path = os.path.join(indir, fname)
        fcs = fcm.loadFCS(full_path, transform=None, auto_comp=False)
        marker_idx = [fcs.channels.index(name) for name in marker_names]
        x = np.asarray(fcs)[:, marker_idx]
        if do_arcsinh:
            x = ftrans(x, cofactor)
        sample_list.append(x)
    return sample_list, list(phenotypes)
Beispiel #7
0
def load_file(File_name):
    Path = File_name
    current_data = fcm.loadFCS(Path)
    sample_id = current_data.notes['text']['sample id']
    FSC = current_data[:, 0]
    SSC = current_data[:, 1]
    GFP = current_data[:, 2]
    data = NP.vstack((FSC, SSC, GFP))
    data = NP.transpose(data)

    #	print "Loaded {0}...".format(File_name)

    return data, sample_id
Beispiel #8
0
def auto_generate_channel_dict(filePath):
    fcsData = fcm.loadFCS(filePath,auto_comp=False,transform=None)
    nameMatchedChannels = [get_official_name_match(chan) for chan in fcsData.channels]
    isValidDict = True
    channelDict = {}
    
    for c, cname in enumerate(nameMatchedChannels):
        if cname == 'Unmatched':
            isValidDict = False

        channelDict[cname] = c

    return isValidDict,channelDict
Beispiel #9
0
 def __get_newest_fcs_file(self,root,type_file_pairs):
     """
     find and returns the newest fcs file according to the internal date_stamp
     """
     times = []
     for pair in type_file_pairs:
         file_path = root+'/'+pair[1]
         t = loadFCS(file_path).notes['text']['export time']
         t = t.replace('-',' ')
         t = time.strptime(t,"%d %b %Y %H:%M:%S")
         times.append(t)
     latest_time = max(times)
     latest_index = times.index(latest_time)
     pair_to_use = type_file_pairs[latest_index]
     return pair_to_use
Beispiel #10
0
def no_inhibitor_lookup_full(data_path, stimuli, ctypes, marker_idx):
	lookup = dict()
	dose = 'H'
	labels = None
	print stimuli
	
	for key in get_immediate_subdirectories(data_path):
		subdir = os.path.join(data_path, key)
		full_data_list = []
		stim_list, ctype_list = [], []
	
		scaler = StandardScaler(with_std=False)
		for ii, ctype in enumerate(ctypes):
			for jj, stim in enumerate(stimuli):
				
				tu = (key, ctype, dose, stim)
				fname = os.path.join(subdir, '{0}_{1}_{2}{3}.fcs'.format(*tu))
				try:

					# read the .fcs file
					x_full = fcm.loadFCS(fname, transform=None)
					if labels is None:
						labels = [x_full.channels[ii] for ii in marker_idx]

					# keep only interesting markers and arcsinh-transform the data
					x_full = ftrans(np.asarray(x_full)[:,marker_idx], 5)
						
					# fit a mean-shift scaler on control CD4+ T-cells (only on intracellular markers)
					if (ctype == 'cd4+') and (stim == '05'):
						scaler.fit(x_full[:,10:])
						
					# and transform all cell types using this scaler
					x_full[:,10:] = scaler.transform(x_full[:,10:])
						
					# accumulate all the data seen so far along with their labels
					full_data_list.append(x_full)
					stim_list.append(jj * np.ones(x_full.shape[0], dtype=int))
					ctype_list.append([ctype] * x_full.shape[0])

				except Exception: 
					pass

		lookup[key] = {'X': np.vstack(full_data_list),
					   'y': np.hstack(stim_list),
					   'ctype' : flat_list(ctype_list),
					   'labels' : labels,
					   'scaler' : scaler}
	return lookup
Beispiel #11
0
def load_data():
    infofile = os.path.join(BASEDIR, 'clinical_data_flow_repository.csv')
    df = pd.read_csv(infofile, sep='\t')
    data_list = []
    name_list = []
    ytime_l, ystatus_l, id_l = [], [], []

    for ii in df.index:
        ID, y1, y2 = df.iloc[ii]

        # analyze only samples with positive survival times
        if (y1 > 0):

            file1 = os.path.join(PATH_COMP, '_'.join([prefix,
                                                      str(ID), suffix]))

            try:
                # load the raw .fcs data
                X = fcm.loadFCS(file1, transform=None)

                # keep only interesting markers and arcsinh-transform the data
                X = ftrans(np.asarray(X)[:, marker_idx], 150)

                # discard samples with less than 3000 cells
                if X.shape[0] > 3000:
                    print X.shape
                    data_list.append(X)
                    name_list.append(file1)
                    ytime_l.append(y1)
                    ystatus_l.append(y2)
                    id_l.append(ii)

            except Exception:
                print 'Could not find or load sample: ' + file1
                pass

    y = np.hstack([
        np.hstack(ytime_l).reshape(-1, 1),
        np.hstack(ystatus_l).reshape(-1, 1),
        np.hstack(id_l).reshape(-1, 1)
    ])
    return data_list, name_list, y
Beispiel #12
0
	def extract_data(self,file_name):
	#extract data from the specified file and save it into memory
		try:
			self.data=loadFCS(os.path.join(self.folder,file_name))
		except IOError:		#If the file is not found, try again in 2 seconds (wait for the cytometer software to export the file) and if the files are still not found, give up.
			if self.extract_error!=1:
				self.extract_error=1
				time.sleep(2)
				self.extract_data(file_name)
				return
			else:
				logging.warning('fcs file named %s was not found in folder %s',file_name,self.folder)
				self.extract_error=0
				return
		self.extract_error=0
		self.FSC_H=[self.data[:,index] for index in range(len(self.data.channels)) if self.data.channels[index]=='FSC-H'][0]#extracts data of desired channel.
		self.SSC_H=[self.data[:,index] for index in range(len(self.data.channels)) if self.data.channels[index]=='SSC-H'][0]#extracts data of desired channel.
		self.FSC_A=[self.data[:,index] for index in range(len(self.data.channels)) if self.data.channels[index]=='FSC-A'][0]#extracts data of desired channel.
		self.SSC_A=[self.data[:,index] for index in range(len(self.data.channels)) if self.data.channels[index]=='SSC-A'][0]#extracts data of desired channel.
		self.GFP=[self.data[:,index] for index in range(len(self.data.channels)) if self.data.channels[index]=='FL1-A'][0]#extracts data of desired channel.	
Beispiel #13
0
def loadFCS(filename, transform=None, auto_comp=False, spill=None, sidx=None, **kwargs):
    """
    Load and return a FCQ data object from an FCS file
    """
    # Load a FCM object using the loadFCS function
    fcm_obj = fcm.loadFCS(filename, transform, auto_comp, spill, sidx, **kwargs)
    # convert object to fcq format
    tname = fcm_obj.name
    tpnts = fcm_obj.tree.root.data
    tnotes = fcm_obj.notes.copy()
    tchannels = fcm_obj.channels[:]
    tscchannels = fcm_obj.scatters[:]
    
    fcs = FCQdata(tname, tpnts, tchannels, tscchannels, tnotes)
    from copy import deepcopy
    fcs.tree = deepcopy(fcm_obj.tree)
    
    # Transform if necessary
    if transform == 'ilog':
        fcs.ilog()
    return fcs
Beispiel #14
0
 def extract_data(self, file_name):
     #extract data from the specified file and save it into memory
     try:
         self.data = loadFCS(os.path.join(self.folder, file_name))
     except IOError:  #If the file is not found, try again in 2 seconds (wait for the cytometer software to export the file) and if the files are still not found, give up.
         if self.extract_error != 1:
             self.extract_error = 1
             time.sleep(2)
             self.extract_data(file_name)
             return
         else:
             logging.warning('fcs file named %s was not found in folder %s',
                             file_name, self.folder)
             self.extract_error = 0
             return
     self.extract_error = 0
     self.FSC_H = [
         self.data[:, index] for index in range(len(self.data.channels))
         if self.data.channels[index] == 'FSC-H'
     ][0]  #extracts data of desired channel.
     self.SSC_H = [
         self.data[:, index] for index in range(len(self.data.channels))
         if self.data.channels[index] == 'SSC-H'
     ][0]  #extracts data of desired channel.
     self.FSC_A = [
         self.data[:, index] for index in range(len(self.data.channels))
         if self.data.channels[index] == 'FSC-A'
     ][0]  #extracts data of desired channel.
     self.SSC_A = [
         self.data[:, index] for index in range(len(self.data.channels))
         if self.data.channels[index] == 'SSC-A'
     ][0]  #extracts data of desired channel.
     self.GFP = [
         self.data[:, index] for index in range(len(self.data.channels))
         if self.data.channels[index] == 'FL1-A'
     ][0]  #extracts data of desired channel.
Beispiel #15
0
                   edgecolor='none',
                   alpha=alpha,
                   **kwargs)

    ax.fill(gate.vert.T[0],
            gate.vert.T[1],
            edgecolor='black',
            facecolor='none')


if __name__ == '__main__':
    import fcm
    import numpy
    import matplotlib
    import matplotlib.pyplot as plt
    x = fcm.loadFCS('../../sample_data/3FITC_4PE_004.fcs')
    g = PolyGate(numpy.array([[0, 0], [500, 0], [500, 500], [0, 500]]), [0, 1])

    g3 = QuadGate([250, 300], (2, 3))
    fig = plt.figure()
    ax = fig.add_subplot(2, 2, 1)
    plot_gate(x, g, ax, name="firstgate", alpha=.5, bgalpha=.5)
    ax = fig.add_subplot(2, 2, 2)
    mx = x[:, 2].max()
    print mx
    g2 = ThresholdGate(mx - 1, 2)
    plot_gate(x, g2, ax, name="secondgate", chan=(2, 3),
              alpha=.5)  # , bgc='red', c='green')
    print x.shape
    print x[:]
    x.visit('root')
Beispiel #16
0
from __future__ import division
import os
import sys
import glob
import fcm
import fcm.statistics as stats
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LinearSegmentedColormap

if __name__ == '__main__':

    data = fcm.loadFCS('../data/basics/10072101.02')
    cols = [2,3,4]

    x, y, z = data[:,cols[0]], data[:,cols[1]], data[:, cols[2]]
    try:
        labels = np.load('labels_10072101_02.npy')
    except:
        m = stats.DPMixtureModel(nclusts=32, burnin=1000, niter=100)
        m.ident = True
        r = m.fit(data[:, cols], verbose=10)
        rav = r.average()
        c = rav.make_modal()
        labels = c.classify(data[:, cols])
        np.save('labels_10072101_02.npy', labels)

    colors = LinearSegmentedColormap('colormap', cm.jet._segmentdata.copy(), np.max(labels))
    cs = [colors(i) for i in labels]
Beispiel #17
0
import sys
# sys.path.append("/Users/cliburn/MyPython")

from fcm import loadFCS
from fcm.statistics import DPMixtureModel
from pylab import scatter, show, subplot, savefig
from fcm.graphics.plot import heatmap

import time

if __name__ == '__main__':
    # load data
    data = loadFCS('../sample_data/3FITC_4PE_004.fcs')
    heatmap(data, [(0, 1), (0, 2), (0, 3), (2, 3)],
            2,
            2,
            s=1,
            edgecolors='none',
            savefile='foo.tif')
    show()
Beispiel #18
0
    kx = (w - 1) / (x1 - x0)
    ky = (h - 1) / (y1 - y0)
    imgw = (w + 2 * border)
    imgh = (h + 2 * border)
    img = np.zeros((imgh, imgw))
    for x, y in data:
        ix = int((x - x0) * kx) + border
        iy = int((y - y0) * ky) + border
        if 0 <= ix < imgw and 0 <= iy < imgh:
            img[iy][ix] += 1
    return img


if __name__ == '__main__':

    data = fcm.loadFCS('../data/basics/10072101.02')
    cols = [3, 4]
    xmin = 0
    ymin = 0
    xmax = int(data.notes.text['p%dr' % (1 + cols[0])])
    ymax = int(data.notes.text['p%dr' % (1 + cols[1])])

    x, y = data[:, cols[0]], data[:, cols[1]]
    xmu, xsd = x.mean(), x.std()
    ymu, ysd = y.mean(), y.std()
    x1 = (x - xmu) / xsd
    y1 = (y - ymu) / ysd

    view_xmin = (xmin - xmu) / xsd
    view_ymin = (xmin - ymu) / ysd
    view_xmax = (xmax - xmu) / xsd
Beispiel #19
0
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection
from matplotlib.colors import LinearSegmentedColormap

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

fs = glob.glob(os.path.join('..', 'data', 'basics', '*01'))
colors = LinearSegmentedColormap(
    'colormap', cm.jet._segmentdata.copy(), len(fs))

verts = []
for f in fs:
    cd4 = fcm.loadFCS(f)[:, 'CD4-PE']
    z, edges = np.histogram(cd4, bins=50)
    y = 0.5 * (z[1:] + z[:-1])
    x = np.arange(len(y))
    verts.append(zip(x, y))

verts = np.array(verts)
n, p, d = verts.shape
maxz = np.max(verts[:, 0])

poly = PolyCollection(verts, facecolors=[colors(i) for i in range(n)])
poly.set_alpha(0.7)
ax.add_collection3d(poly, zs=np.arange(n), zdir='y')

ax.set_xlabel('CD4-PE')
ax.set_xlim3d(0, p)
Beispiel #20
0
import sys
# sys.path.append("/Users/cliburn/MyPython")

from fcm import loadFCS
from fcm.statistics import DPMixtureModel
from pylab import scatter, show, subplot, savefig
from fcm.graphics.plot import heatmap

import time

if __name__ == '__main__':
    # load data
    data = loadFCS('../sample_data/3FITC_4PE_004.fcs')
    heatmap(data, [(0, 1), (0, 2), (0, 3), (2, 3)], 2, 2, s=1, edgecolors='none',
            savefile='foo.tif')
    show()
Beispiel #21
0
"""Overlay and stacked histograms."""

import os
import pylab
import glob
import fcm

bins = 50
fs = glob.glob(os.path.join('..', 'data', 'basics', '*01'))

# overlay histogram
for f in fs:
    path, filename = os.path.split(f)
    name, ext = os.path.splitext(filename)
    data = fcm.loadFCS(f)
    pylab.subplot(1,1,1)
    pylab.hist(data[:, 'CD4-PE'], bins, histtype='step', label=filename)
pylab.xlabel('CD4-PE')
pylab.ylabel('Counts')
pylab.legend()
pylab.tight_layout()

pylab.show()
Beispiel #22
0
#!/usr/bin/env python

import os,time
import fcm
import fcm.statistics
import pickle
import numpy as np

#print 'listing shared library depens.'
#os.system("ldd /home/clemmys/research/py-fcm-gpu/src/statistics/_cdp.so")

print 'loading data'
#fileNameFCS = os.path.join("..","cytostream","example_data","3FITC_4PE_004.fcs")
fileNameFCS = "/home/clemmys/research/eqapol/donors/../materials/EQAPOL_4c_ICS_Donor_Screening/Assay_Data/EQAPOL_4c_ICS_08Apr11/FCS files/H6904VB6_01 Costim 3 C3.031"
fileFCS = fcm.loadFCS(fileNameFCS)

print 'get subsample'
subsample = 1e4
np.random.seed(99)
n,d = np.shape(fileFCS)
subsampleIndices = np.random.random_integers(0,n-1,subsample)
data = fileFCS[subsampleIndices,:]

print 'loading model'
mod = fcm.statistics.DPMixtureModel(data, 16)
print 'cuda device:', mod.cdp.getdevice()

print 'running model'
modelRunStart = time.time()
mod.fit(verbose=False)
modelRunStop = time.time()
Beispiel #23
0
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection
from matplotlib.colors import LinearSegmentedColormap

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

fs = glob.glob(os.path.join('..', 'data', 'basics', '*01'))
colors = LinearSegmentedColormap('colormap', cm.jet._segmentdata.copy(), len(fs))

verts = []
for f in fs:
    cd4 = fcm.loadFCS(f)[:, 'CD4-PE']
    z, edges = np.histogram(cd4, bins=50)
    y = 0.5*(z[1:] + z[:-1])
    x = np.arange(len(y))
    verts.append(zip(x, y))

verts = np.array(verts)
n, p, d = verts.shape
maxz = np.max(verts[:, 0])

poly = PolyCollection(verts, facecolors = [colors(i) for i in range(n)])
poly.set_alpha(0.7)
ax.add_collection3d(poly, zs=np.arange(n), zdir='y')

ax.set_xlabel('CD4-PE')
ax.set_xlim3d(0, p)
Beispiel #24
0
 def test_load_fcs_from_memory():
     import io
     with open('sample_data/3FITC_4PE_004.fcs') as f:
         mem_file = io.BytesIO(f.read())
         loadFCS(mem_file)
Beispiel #25
0
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mp
from matplotlib.ticker import NullFormatter
from matplotlib.ticker import MultipleLocator
from fcm import loadFCS

# Load FCS file using loadFCS from fcm
data = loadFCS("3FITC_4PE_004.fcs")
x = data[:, 'FSC-H']
y = data[:, 'SSC-H']

# definitions for the axes
left, width = 0.1, 0.65
bottom, height = 0.1, 0.65
bottom_h = left_h = left + width + 0.02

rect_scatter = [left, bottom, width, height]
rect_histx = [left, bottom_h, width, 0.2]
rect_histy = [left_h, bottom, 0.2, height]

plt.figure(1, figsize=(10, 10))

axScatter = plt.axes(rect_scatter)
axHistx = plt.axes(rect_histx)
axHistx.xaxis.set_major_formatter(NullFormatter())
axHistx.xaxis.set_minor_formatter(NullFormatter())
axHisty = plt.axes(rect_histy)
axHisty.yaxis.set_major_formatter(NullFormatter())
axHisty.yaxis.set_minor_formatter(NullFormatter())
Beispiel #26
0
import pylab
import glob
import fcm
from matplotlib.colors import LinearSegmentedColormap

bins = 50
fs = glob.glob(os.path.join('..', 'data', 'basics', '*01'))
colors = LinearSegmentedColormap('colormap', pylab.cm.jet._segmentdata.copy(),
                                 len(fs))

# stacked histogram
pylab.figure(figsize=(5, 15))
for k, f in enumerate(fs):
    path, filename = os.path.split(f)
    name, ext = os.path.splitext(filename)
    data = fcm.loadFCS(f)
    pylab.subplot(5, 1, k + 1)
    pylab.hist(data[:, 'CD4-PE'],
               bins,
               histtype='step',
               color=colors(k),
               label=name)
    pylab.ylabel('Counts')
    if k == (len(fs) - 1):
        pylab.xlabel('CD4-PE')
    else:
        pylab.xticks([])
    pylab.legend()

pylab.tight_layout()
pylab.savefig('stacked_hist.png')
Beispiel #27
0
 def testMultiLoad(self):
     for unused in range(100):
         unused_x = loadFCS('../sample_data/3FITC_4PE_004.fcs', transform=None)
Beispiel #28
0
 def testIOLoad(self):
     import io
     with open('../sample_data/3FITC_4PE_004.fcs') as f:
         mem_file = io.BytesIO(f.read())
         fcm_data = loadFCS(mem_file)
Beispiel #29
0
def pyfcm_load_fcs_file(filePath):
    data = fcm.loadFCS(filePath)
    return data
Beispiel #30
0
#                              (numpy.max(y)-numpy.min(y))*(bins-1))
#     zfrac, zint = numpy.modf((z - numpy.min(z))/
#                              (numpy.max(z)-numpy.min(z))*(bins-1))

#     xint = xint.astype('i')
#     yint = yint.astype('i')
#     zint = zint.astype('i')

#     # not interpolated - kiv write trilinear_interpolate function
#     h, edges = numpy.histogramdd(fcm[:,[idx0, idx1, idx2]], bins=bins)
#     v = h[xint, yint, zint]

    mlab.figure()
    mlab.points3d(x, y, z, s, mode='point')
    mlab.xlabel(fcm.channels[idx0])
    mlab.ylabel(fcm.channels[idx1])
    mlab.zlabel(fcm.channels[idx2])

if __name__ == '__main__':
    import sys
    sys.path.append('../')
    import fcm

    xs = fcm.loadFCS('../../sample_data/3FITC_4PE_004.fcs')

    surface(xs, 2, 3)

    spin(xs, 1, 2, 3)

    mlab.show()
Beispiel #31
0
import fcm
import matplotlib.pyplot as plt

# load FCS data
data = fcm.loadFCS('3FITC_4PE_004.fcs')

# define a gate
gate1 = fcm.PolyGate([(400, 100), (400, 300), (600, 300), (600, 100)], (0, 1))

# apply the gate
gate1.gate(data)

# outputs:
# root
#    g1
print data.tree.pprint()

# g1 isn't and informative name, so lets rename it events
current_node = data.current_node
data.tree.rename_node(current_node.name, 'events')
# outputs:
# root
#    events
print data.tree.pprint()

# return to the transformed node and plot
data.tree.visit('root')
plt.figure(figsize=(4, 4))
plt.scatter(data[:, 0], data[:, 1], s=1, edgecolors='none', c='grey')

# and visit the subset of interest to plot
Beispiel #32
0
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mp
from matplotlib.ticker import NullFormatter
from matplotlib.ticker import MultipleLocator
from fcm import loadFCS

# Load FCS file using loadFCS from fcm
data = loadFCS("3FITC_4PE_004.fcs")
x = data[:, 'FSC-H']
y = data[:, 'SSC-H']

# definitions for the axes
left, width = 0.1, 0.65
bottom, height = 0.1, 0.65
bottom_h = left_h = left + width + 0.02

rect_scatter = [left, bottom, width, height]
rect_histx = [left, bottom_h, width, 0.2]
rect_histy = [left_h, bottom, 0.2, height]

plt.figure(1, figsize=(10, 10))


axScatter = plt.axes(rect_scatter)
axHistx = plt.axes(rect_histx)
axHistx.xaxis.set_major_formatter(NullFormatter())
axHistx.xaxis.set_minor_formatter(NullFormatter())
axHisty = plt.axes(rect_histy)
axHisty.yaxis.set_major_formatter(NullFormatter())
axHisty.yaxis.set_minor_formatter(NullFormatter())
if re.search('\D',str(k)):
    print "INPUT ERROR: k must be numeric"
else:
    k = int(k)

## initialize a logger and a model to get specified files and channels
print 'initializing logger'
log = Logger()
log.initialize(projectID,homeDir,load=True)

model = Model()
model.initialize(projectID,homeDir)

## load the data into py-fcm
if re.search("\.fcs",longFileName):
    data = fcm.loadFCS(longFileName)
elif re.search("\.pickle",longFileName):
    data= cPickle.load(open(longFileName,'r'))

## account for excluded channels
#excludedChannels = log.log['excludedChannels']
#
#if type(log.log['excludedChannels']) != type([]):
#    excludedChannels = []
#
#fileChannels = model.get_file_channel_list(fileName)
#allChannels = range(len(fileChannels))
#excludedIndices = []

#if len(excludedChannles) > 0:
#    for chan in excludedChannels:
Beispiel #34
0
def report(glb, out):
    """generate a nice summary report of a group of fcs files"""
    panels = {}
    panel_map = {}
    means = defaultdict(list)
    nevents = defaultdict(list)
    laserabs = defaultdict(list)
    for i in glob(glb):
        print i
        x = fcm.loadFCS(i)
        h = hsh(x)
        if h not in panels:
            panels[h] = x.long_names
        for i in range(len(x.channels)):
            laserabs[x.short_names[i]].append(x.channels[i])

        panel_map[i] = h
        means[h].append(x.mean(0))
        nevents[h].append(x.shape[0])

    panel_idx = {}
    for i, j in enumerate(panels.keys()):
        panel_idx[j] = i

    rev_map = defaultdict(list)
    for i in panel_map:
        rev_map[panel_idx[panel_map[i]]].append(i)

    common = set(panels[panels.keys()[0]])
    for i in panels:
        common.intersection_update(set(panels[i]))

    with open("panels.md", "w") as f:
        f.write("Panels\n")
        f.write("======\n")
        for i, j in enumerate(panels):
            f.write("Panel %d:\n" % i)
            f.write("-" * len("Panel %d:" % i))
            f.write("\n")
            f.write("number of samples: %d\n" % len(means[j]))
            f.write("\n")
            for k in panels[j]:
                f.write(" * %s\n" % k)
            f.write("\n")
            # draw number of events figure
            fig = plt.figure()
            z = np.array(nevents[j])
            ax = fig.add_subplot(1, 1, 1)
            ax.hist(z, bins=bin_size(z), histtype="step")
            ax.set_xlabel("Number of events")
            plt.tight_layout()
            fig.savefig("nevents_panel_%d.png" % i)
            f.write("![distribution of number of events](nevents_panel_%d.png)\n" % i)
            # draw distribution of means
            fig = plt.figure()
            ax = fig.add_subplot(1, 1, 1)
            mean = np.array(means[j]).squeeze()
            if len(means[j]) > 1:
                bp = ax.boxplot(np.log10(mean))
                set_color(bp)
            else:
                ax.plot(np.arange(len(mean)), np.log10(mean), "b+")
                ax.set_xticks(np.arange(len(mean)))
                ax.set_xlim((-1, len(mean)))
            ax.set_xticklabels(panels[j], rotation=90)
            ax.set_ylabel(r"log_10()")
            ax.set_title("Distribution of Means")
            plt.subplots_adjust(bottom=0.8)
            plt.tight_layout()
            fig.savefig("dist_panel_%d.png" % i)
            f.write("![distribtuion of means by channel](dist_panel_%d.png)\n" % i)
            f.write("\n")
        f.write("Common Markers:\n")
        f.write("---------------\n")
        for j in common:
            f.write(" * %s\n" % j)

        f.write("\n")
        f.write("Detector overview:\n")
        f.write("---------------\n")
        for i in laserabs:
            markers = list(set(laserabs[i]))
            markers.sort()
            tmp = ""
            for j in markers:
                tmp = tmp + ", %s (%d)" % (j, laserabs[i].count(j))
            f.write("%s\t: %s\n\n" % (i, tmp))
        f.write("\n")
    print "DONE"
Beispiel #35
0
 def test_load_fcs():
     for unused in range(100):
         loadFCS('sample_data/3FITC_4PE_004.fcs', transform=None)