def test_hlog_on_fc_measurement(self):
     fc_measurement = FCMeasurement(ID='test', datafile=test_path)
     fc_measurement = fc_measurement.transform(transform='hlog', b=10)
     data = fc_measurement.data.values[:3, :4]
     correct_output = np.array([[-8.22113965e+03, 1.20259949e+03, 1.01216449e-06,
                                 5.21899170e+03],
                                [-8.66184277e+03, 1.01013794e+03, 1.01216449e-06,
                                 5.71275928e+03],
                                [-8.79974414e+03, 1.52737976e+03, 1.01216449e-06,
                                 -4.95852930e+03]])
     np.testing.assert_array_almost_equal(data, correct_output, 5,
                                          err_msg='the hlog transformation gives '
                                                  'an incorrect result')
Esempio n. 2
0
 def test_hlog_on_fc_measurement(self):
     fc_measurement = FCMeasurement(ID='test', datafile=test_path)
     fc_measurement = fc_measurement.transform(transform='hlog', b=10)
     data = fc_measurement.data.values[:3, :4]
     correct_output = np.array([
         [-8.22113965e+03, 1.20259949e+03, 1.01216449e-06, 5.21899170e+03],
         [-8.66184277e+03, 1.01013794e+03, 1.01216449e-06, 5.71275928e+03],
         [-8.79974414e+03, 1.52737976e+03, 1.01216449e-06, -4.95852930e+03]
     ])
     np.testing.assert_array_almost_equal(
         data,
         correct_output,
         5,
         err_msg='the hlog transformation gives '
         'an incorrect result')
Esempio n. 3
0
    def __read_fcs_file_to_fcm(self, fcs_file_name):
        fcs_file = os.path.join(SHARED_RAW_DIR, fcs_file_name)
        if not os.path.exists(fcs_file):
            print('FCS file does not exist ', fcs_file)
            # return False
            fcs_file = os.path.join(SHARED_RAW_DIR,
                                    'fcs_file.fcs')  # running from cli

        # Load data
        tsample = FCMeasurement(ID='Test Sample', datafile=fcs_file)
        if self.transformation:
            tsample = tsample.transform(self.transformation, b=self.bins)

        self.channel_names = tsample.channel_names
        if not self.channel_name1 and not self.channel_name2:
            print('Check if channel names False', self.channel_names)
            self.channel_name1 = self.channel_names[0]
            self.channel_name2 = self.channel_names[1]
        else:
            self.channel_names = [self.channel_name1, self.channel_name2]

        self.sample = tsample  # tsample.transform('hlog', channels=['Y2-A', 'B1-A', 'V2-A'], b=500.0)
Esempio n. 4
0
import os

from pylab import *

import FlowCytometryTools
from FlowCytometryTools import FCMeasurement

# Locate sample data included with this package
datadir = os.path.join(FlowCytometryTools.__path__[0], 'tests', 'data',
                       'Plate01')
datafile = os.path.join(datadir, 'RFP_Well_A3.fcs')

# datafile = '[insert path to your own fcs file]'

# Load data
tsample = FCMeasurement(ID='Test Sample', datafile=datafile)
tsample = tsample.transform('hlog', channels=['Y2-A', 'B1-A', 'V2-A'], b=500.0)

# Plot
tsample.plot(['Y2-A', 'B1-A'], kind='scatter', alpha=0.6, color='gray')
grid(True)

#show() # <-- Uncomment when running as a script.
from FlowCytometryTools import FCMeasurement, ThresholdGate
import os, FlowCytometryTools
from pylab import *

# Locate sample data included with this package
datadir = os.path.join(FlowCytometryTools.__path__[0], 'tests', 'data', 'Plate01')
datafile = os.path.join(datadir, 'RFP_Well_A3.fcs')

# datafile = '[insert path to your own fcs file]' 

# Load data
tsample = FCMeasurement(ID='Test Sample', datafile=datafile)
tsample = tsample.transform('hlog', channels=['Y2-A', 'B1-A', 'V2-A'], b=500.0)

# Plot
tsample.plot(['Y2-A', 'B1-A'], kind='scatter', alpha=0.6, color='gray');
grid(True)

#show() # <-- Uncomment when running as a script.
def custom_compensate(original_sample):
    # Copy the original sample
    new_sample = original_sample.copy()
    new_data = new_sample.data
    original_data = original_sample.data

    # Our transformation goes here
    new_data['Y2-A'] = original_data['Y2-A'] - 0.15 * original_data['FSC-A']
    new_data['FSC-A'] = original_data['FSC-A'] - 0.32 * original_data['Y2-A']
    new_data = new_data.dropna() # Removes all NaN entries
    new_sample.data = new_data
    return new_sample

# Load data
sample = FCMeasurement(ID='Test Sample', datafile=datafile)
sample = sample.transform('hlog')

compensated_sample = sample.apply(custom_compensate)

###
# To do this with a collection (a plate):
# compensated_plate = plate.apply(compensate, output_format='collection')
#

# Plot
sample.plot(['Y2-A', 'FSC-A'], kind='scatter', color='gray', alpha=0.6, label='Original');
compensated_sample.plot(['Y2-A', 'FSC-A'], kind='scatter', color='green', alpha=0.6, label='Compensated');

legend(loc='best')
grid(True)
Esempio n. 7
0
    # Copy the original sample
    new_sample = original_sample.copy()
    new_data = new_sample.data
    original_data = original_sample.data

    # Our transformation goes here
    new_data['Y2-A'] = original_data['Y2-A'] - 0.15 * original_data['FSC-A']
    new_data['FSC-A'] = original_data['FSC-A'] - 0.32 * original_data['Y2-A']
    new_data = new_data.dropna()  # Removes all NaN entries
    new_sample.data = new_data
    return new_sample


# Load data
sample = FCMeasurement(ID='Test Sample', datafile=datafile)
sample = sample.transform('hlog')

compensated_sample = sample.apply(custom_compensate)

###
# To do this with a collection (a plate):
# compensated_plate = plate.apply(compensate, output_format='collection')
#

# Plot
sample.plot(['Y2-A', 'FSC-A'],
            kind='scatter',
            color='gray',
            alpha=0.6,
            label='Original')
compensated_sample.plot(['Y2-A', 'FSC-A'],
Esempio n. 8
0
samples = []
pattern = 'Tube'

for file in os.listdir(datadir):
    if file.endswith(".fcs") and pattern in file:
        print(file)
        samples.append(FCMeasurement(ID=file, datafile=datadir + "/" + file))

        #%%
        from LoadFlowSamples_v1 import Samples
        datadir = '/Users/Alonyan/GoogleDrive/Experiments/Th1Th2'
        pattern = 'Tube'
        FS = Samples(datadir, pattern)
#%%
tsample = sample.transform('hlog',
                           channels=['PE-Texas Red-A', 'Pacific Blue-A'],
                           b=500.0)
#%%
import pylab as pl
#%%
pl.figure()
tsample.plot('Pacific Blue-A', bins=150)

#%%

pl.figure(num=None, figsize=(5.1, 6.6), dpi=80, facecolor='w', edgecolor='k')

tsample.plot(['PE-Texas Red-A', 'Pacific Blue-A'],
             cmap=pl.cm.gist_rainbow,
             bins=150)
Esempio n. 9
0
class flowsom():
    """"
    Class of flowSOM clustering
    """
    import pandas as pd
    import numpy as np
    import FlowCytometryTools
    from FlowCytometryTools import test_data_dir, test_data_file
    from FlowCytometryTools import FCMeasurement
    from sklearn.cluster import AgglomerativeClustering

    def __init__(self,
                 file_address,
                 if_fcs=True,
                 if_drop=True,
                 drop_col=['Time']):
        """
        Read the fcs file as pd.Dataframe

        Parameters
        ----------
        file_address : string 
                       e.g. r'#40 Ab.fcs' or 'flowmetry.csv'
        if_fcs : bool
                 whethe the imput file is fcs file. If not, it should be a csv file
        if_drop : bool
                  define whether some columns should be ignored
        drop_col : list of strings
                   list of column names to be dropped
        """
        if if_fcs:
            self.info = FCMeasurement(ID='Train', datafile=file_address)
            df = self.info.data
        else:
            df = pd.read_csv(file_address)

        self.df = df
        if if_drop:
            self.df = df.drop(drop_col, axis=1)

    def tf(self, tf_str=None, if_fcs=True):
        """
        transform the data, available transform methods include: 'hlog', 'hlog_inv', 'glog', 'tlog'...
        for details, check: https://github.com/eyurtsev/FlowCytometryTools/blob/master/FlowCytometryTools/core/transforms.py#L242

        Parameters
        ----------
        tf_str : string
                 e.g. 'hlog', the transform algorithm
        if_fcs : bool
                 whethe the imput file is fcs file. If not, it should be a csv file
                 only the fcs file could be transformed
                 if it is a csv file, you have to make your own transform function                 
        """
        log_data = pd.DataFrame()
        if if_fcs and tf_str:
            for col in self.df.columns:
                hsample = self.info.transform(
                    tf_str,
                    channels=[col])  # perform transforming for each column
                h_data = hsample.data[col]  # get the transforming data
                log_data[col] = h_data.data  # store the data into new df
            self.tf_df = log_data
            self.tf_matrix = log_data.values
        else:
            #################################
            ## to-do: transform for ndarray##
            #################################

            self.tf_df = self.df
            self.tf_matrix = self.df.values

    def som_mapping(self,
                    x_n,
                    y_n,
                    d,
                    sigma,
                    lr,
                    batch_size,
                    neighborhood='gaussian',
                    tf_str=None,
                    if_fcs=True,
                    seed=10):
        """
        Perform SOM on transform data

        Parameters
        ----------
        x_n : int
              the dimension of expected map
        y_n : int
              the dimension of expected map
        d : int
            vector length of input df
        sigma : float
               the standard deviation of initialized weights
        lr : float 
            learning rate
        batch_size : int
                     iteration times
        neighborhood : string
                       e.g. 'gaussian', the initialized weights' distribution
        tf_str : string
                 tranform parameters, go check self.tf()
                 e.g. None, 'hlog' - the transform algorithm
        if_fcs : bool
                 tranform parameters, go check self.tf()
                 whethe the imput file is fcs file. If not, it should be a csv file
                 only the fcs file could be transformed
                 if it is a csv file, you have to make your own transform function   
        seed : int
               for reproducing
        """
        from minisom import MiniSom

        self.tf(tf_str, if_fcs)
        som = MiniSom(x_n,
                      y_n,
                      d,
                      sigma,
                      lr,
                      neighborhood_function=neighborhood,
                      random_seed=seed)  # initialize the map
        som.pca_weights_init(self.tf_matrix)  # initialize the weights
        print("Training...")
        som.train_batch(self.tf_matrix, batch_size,
                        verbose=True)  # random training
        print("\n...ready!")
        self.x_n = x_n
        self.y_n = y_n
        self.map_som = som
        self.weights = som.get_weights()
        self.flatten_weights = self.weights.reshape(x_n * y_n, d)

    def meta_clustering(self,
                        cluster_class,
                        min_n,
                        max_n,
                        iter_n,
                        resample_proportion=0.7,
                        verbose=False):
        """
        Perform meta clustering on SOM

        Parameters
        ----------
        cluster_class : class
                        e.g. KMeans, a cluster class, like "from sklearn.cluster import KMeans"
        min_n : int
                the min proposed number of cluster
        max_n : int
                the max proposed number of cluster
        iter_n : int
                 the iteration times for each number of clusters
        resample_proportion : float
                              within (0, 1), the proportion of re-sampling when computing clustering
        verbose : bool
                  whether print out the clustering process
        """

        # initialize cluster
        cluster_ = ConsensusCluster(cluster_class,
                                    min_n,
                                    max_n,
                                    iter_n,
                                    resample_proportion=resample_proportion)
        cluster_.fit(self.flatten_weights,
                     verbose)  # fitting SOM weights into clustering algorithm

        self.cluster_map = cluster_
        self.bestk = cluster_.bestK  # the best number of clusters in range(min_n, max_n)

        # get the prediction of each weight vector on meta clusters (on bestK)
        self.flatten_class = cluster_.predict_data(self.flatten_weights)
        self.map_class = self.flatten_class.reshape(self.x_n, self.y_n)

    def vis(self, t, with_labels, node_size, edge_color):
        """
        Visualize the meta cluster result with minimal spanning tree

        Parameters
        ----------
        t : int
            total number of nodes, n = t * bestK
        with_labels : bool
                      whether the node will be visualized with its cluster label
        node_size : int
        edge_color : string
                     e.g 'b', the color of edges
        """
        from matplotlib.gridspec import GridSpec
        import networkx as nx
        import numpy as np
        from collections import Counter
        import matplotlib.pyplot as plt
        import matplotlib.cm as cm

        # generate n clusters (n = bestK * t)
        self.cluster_map.bestK = self.bestk * t
        self.over_class = self.cluster_map.predict_data(self.flatten_weights)
        centroid_list = []

        # Compute the centroid for each clusters
        for i in np.unique(self.over_class):
            centroid = np.mean(self.flatten_weights[self.over_class == i],
                               axis=0)
            centroid_list.append(centroid)
        self.centroid_list = centroid_list

        # Generate a fully connected graph of n cluster centroids for future computation
        # on minimal spanning tree
        # (node: centroid of cluster, weight of edge: distance between two nodes)
        G = nx.Graph()

        for i in range(len(centroid_list)):
            for j in range(i + 1, len(centroid_list)):
                # compute the distance between two nodes
                w = np.sqrt(
                    np.dot(centroid_list[i], centroid_list[i]) -
                    2 * np.dot(centroid_list[i], centroid_list[j]) +
                    np.dot(centroid_list[j], centroid_list[j]))
                w /= 1
                G.add_edge(i, j, weight=w)
        self.graph = G
        mst = nx.minimum_spanning_tree(
            G)  # compute the minimal spanning tree graph
        self.mst = mst

        # generate the plot
        edges, weights = zip(*nx.get_edge_attributes(mst, 'weight').items())
        print(self.bestk)
        color_list = cm.rainbow(np.linspace(0, 1, self.bestk))
        color_map = []
        for node in mst:
            class_id, _ = Counter(
                self.flatten_class[self.over_class == node]).most_common()[0]
            try:
                color_map.append(color_list[class_id])
            except:
                print('something wrong with plotting cluster %d!' % class_id)
        nx.draw(mst,
                with_labels=with_labels,
                node_size=node_size,
                node_color=color_map,
                edgelist=edges,
                edge_color=edge_color,
                width=weights * 100,
                edge_cmap=plt.cm.Blues)
        # splt.show()

    def labeling(self, verbose=True):
        """
        Make prediction for the whole dataset, add a column of 'category' with prediction. 
        """
        label_list = []
        for i in range(len(self.tf_matrix)):
            # print the milestone
            if verbose:
                if i % 10000 == 0:
                    print('%d samples done...' % i)

            xx = self.tf_matrix[i, :]  # fetch the sample data
            winner = self.map_som.winner(
                xx
            )  # make prediction, prediction = the closest entry location in the SOM
            c = self.map_class[
                winner]  # from the location info get cluster info
            label_list.append(c)
        self.df['category'] = label_list
        self.tf_df['category'] = label_list
Esempio n. 10
0
#fig, axs = plt.subplots(3, 1)
#figure();
# Initialize a Figure 
fig = plt.figure()


# Add Axes to the Figure
#fig.add_axes([0,0,1,1])
#fig, axs = plt.subplots(3, 1)
#fig.subplots_adjust(hspace=0.0, wspace= 0.0)
plt.subplots(figsize=(2,2))
subplots_adjust(hspace=0.0, wspace= 0.0)
# Load data
datafile = r'/Users/anazuniga/Documents/phyton/FlowCytometryTools/FCplate 2 input/2MP1/export_P7R_Single Cells.fcs' 
tsample = FCMeasurement(ID='Test Sample', datafile=datafile)
tsample = tsample.transform('hlog', channels=['GFP-H', 'SSC-H', 'mCherry-H', 'FSC-H', 'V1-H'], b=200.0)
ax = subplot(3,1,1)
# Plot
#tsample.plot(['YL2-H', 'SSC-H'], bins=100, alpha=2, cmap=cm.hot);
tsample.plot(['V1-H', 'SSC-H'], kind='scatter', alpha=0.05, color='black', s=1);
ax.tick_params(labelbottom=False, labelleft=False)
#ax1.set_ylim(0, 10000)
#plt.scale('log')
#logbins = np.geomspace(10, 1000000, 100)
#plt.xscale('log')
#ax1.set_yscale('log')
ax.set_ylim(1000, 10000)
#ax1.set_xlim(0, 10000)
#ax1.xaxis.set_label_position('none')
#ax.set_yticks('')
ax.set_ylabel('')