def test_hlog_on_fc_measurement(self): fc_measurement = FCMeasurement(ID='test', datafile=test_path) fc_measurement = fc_measurement.transform(transform='hlog', b=10) data = fc_measurement.data.values[:3, :4] correct_output = np.array([[-8.22113965e+03, 1.20259949e+03, 1.01216449e-06, 5.21899170e+03], [-8.66184277e+03, 1.01013794e+03, 1.01216449e-06, 5.71275928e+03], [-8.79974414e+03, 1.52737976e+03, 1.01216449e-06, -4.95852930e+03]]) np.testing.assert_array_almost_equal(data, correct_output, 5, err_msg='the hlog transformation gives ' 'an incorrect result')
def test_hlog_on_fc_measurement(self): fc_measurement = FCMeasurement(ID='test', datafile=test_path) fc_measurement = fc_measurement.transform(transform='hlog', b=10) data = fc_measurement.data.values[:3, :4] correct_output = np.array([ [-8.22113965e+03, 1.20259949e+03, 1.01216449e-06, 5.21899170e+03], [-8.66184277e+03, 1.01013794e+03, 1.01216449e-06, 5.71275928e+03], [-8.79974414e+03, 1.52737976e+03, 1.01216449e-06, -4.95852930e+03] ]) np.testing.assert_array_almost_equal( data, correct_output, 5, err_msg='the hlog transformation gives ' 'an incorrect result')
def __read_fcs_file_to_fcm(self, fcs_file_name): fcs_file = os.path.join(SHARED_RAW_DIR, fcs_file_name) if not os.path.exists(fcs_file): print('FCS file does not exist ', fcs_file) # return False fcs_file = os.path.join(SHARED_RAW_DIR, 'fcs_file.fcs') # running from cli # Load data tsample = FCMeasurement(ID='Test Sample', datafile=fcs_file) if self.transformation: tsample = tsample.transform(self.transformation, b=self.bins) self.channel_names = tsample.channel_names if not self.channel_name1 and not self.channel_name2: print('Check if channel names False', self.channel_names) self.channel_name1 = self.channel_names[0] self.channel_name2 = self.channel_names[1] else: self.channel_names = [self.channel_name1, self.channel_name2] self.sample = tsample # tsample.transform('hlog', channels=['Y2-A', 'B1-A', 'V2-A'], b=500.0)
import os from pylab import * import FlowCytometryTools from FlowCytometryTools import FCMeasurement # Locate sample data included with this package datadir = os.path.join(FlowCytometryTools.__path__[0], 'tests', 'data', 'Plate01') datafile = os.path.join(datadir, 'RFP_Well_A3.fcs') # datafile = '[insert path to your own fcs file]' # Load data tsample = FCMeasurement(ID='Test Sample', datafile=datafile) tsample = tsample.transform('hlog', channels=['Y2-A', 'B1-A', 'V2-A'], b=500.0) # Plot tsample.plot(['Y2-A', 'B1-A'], kind='scatter', alpha=0.6, color='gray') grid(True) #show() # <-- Uncomment when running as a script.
from FlowCytometryTools import FCMeasurement, ThresholdGate import os, FlowCytometryTools from pylab import * # Locate sample data included with this package datadir = os.path.join(FlowCytometryTools.__path__[0], 'tests', 'data', 'Plate01') datafile = os.path.join(datadir, 'RFP_Well_A3.fcs') # datafile = '[insert path to your own fcs file]' # Load data tsample = FCMeasurement(ID='Test Sample', datafile=datafile) tsample = tsample.transform('hlog', channels=['Y2-A', 'B1-A', 'V2-A'], b=500.0) # Plot tsample.plot(['Y2-A', 'B1-A'], kind='scatter', alpha=0.6, color='gray'); grid(True) #show() # <-- Uncomment when running as a script.
def custom_compensate(original_sample): # Copy the original sample new_sample = original_sample.copy() new_data = new_sample.data original_data = original_sample.data # Our transformation goes here new_data['Y2-A'] = original_data['Y2-A'] - 0.15 * original_data['FSC-A'] new_data['FSC-A'] = original_data['FSC-A'] - 0.32 * original_data['Y2-A'] new_data = new_data.dropna() # Removes all NaN entries new_sample.data = new_data return new_sample # Load data sample = FCMeasurement(ID='Test Sample', datafile=datafile) sample = sample.transform('hlog') compensated_sample = sample.apply(custom_compensate) ### # To do this with a collection (a plate): # compensated_plate = plate.apply(compensate, output_format='collection') # # Plot sample.plot(['Y2-A', 'FSC-A'], kind='scatter', color='gray', alpha=0.6, label='Original'); compensated_sample.plot(['Y2-A', 'FSC-A'], kind='scatter', color='green', alpha=0.6, label='Compensated'); legend(loc='best') grid(True)
# Copy the original sample new_sample = original_sample.copy() new_data = new_sample.data original_data = original_sample.data # Our transformation goes here new_data['Y2-A'] = original_data['Y2-A'] - 0.15 * original_data['FSC-A'] new_data['FSC-A'] = original_data['FSC-A'] - 0.32 * original_data['Y2-A'] new_data = new_data.dropna() # Removes all NaN entries new_sample.data = new_data return new_sample # Load data sample = FCMeasurement(ID='Test Sample', datafile=datafile) sample = sample.transform('hlog') compensated_sample = sample.apply(custom_compensate) ### # To do this with a collection (a plate): # compensated_plate = plate.apply(compensate, output_format='collection') # # Plot sample.plot(['Y2-A', 'FSC-A'], kind='scatter', color='gray', alpha=0.6, label='Original') compensated_sample.plot(['Y2-A', 'FSC-A'],
samples = [] pattern = 'Tube' for file in os.listdir(datadir): if file.endswith(".fcs") and pattern in file: print(file) samples.append(FCMeasurement(ID=file, datafile=datadir + "/" + file)) #%% from LoadFlowSamples_v1 import Samples datadir = '/Users/Alonyan/GoogleDrive/Experiments/Th1Th2' pattern = 'Tube' FS = Samples(datadir, pattern) #%% tsample = sample.transform('hlog', channels=['PE-Texas Red-A', 'Pacific Blue-A'], b=500.0) #%% import pylab as pl #%% pl.figure() tsample.plot('Pacific Blue-A', bins=150) #%% pl.figure(num=None, figsize=(5.1, 6.6), dpi=80, facecolor='w', edgecolor='k') tsample.plot(['PE-Texas Red-A', 'Pacific Blue-A'], cmap=pl.cm.gist_rainbow, bins=150)
class flowsom(): """" Class of flowSOM clustering """ import pandas as pd import numpy as np import FlowCytometryTools from FlowCytometryTools import test_data_dir, test_data_file from FlowCytometryTools import FCMeasurement from sklearn.cluster import AgglomerativeClustering def __init__(self, file_address, if_fcs=True, if_drop=True, drop_col=['Time']): """ Read the fcs file as pd.Dataframe Parameters ---------- file_address : string e.g. r'#40 Ab.fcs' or 'flowmetry.csv' if_fcs : bool whethe the imput file is fcs file. If not, it should be a csv file if_drop : bool define whether some columns should be ignored drop_col : list of strings list of column names to be dropped """ if if_fcs: self.info = FCMeasurement(ID='Train', datafile=file_address) df = self.info.data else: df = pd.read_csv(file_address) self.df = df if if_drop: self.df = df.drop(drop_col, axis=1) def tf(self, tf_str=None, if_fcs=True): """ transform the data, available transform methods include: 'hlog', 'hlog_inv', 'glog', 'tlog'... for details, check: https://github.com/eyurtsev/FlowCytometryTools/blob/master/FlowCytometryTools/core/transforms.py#L242 Parameters ---------- tf_str : string e.g. 'hlog', the transform algorithm if_fcs : bool whethe the imput file is fcs file. If not, it should be a csv file only the fcs file could be transformed if it is a csv file, you have to make your own transform function """ log_data = pd.DataFrame() if if_fcs and tf_str: for col in self.df.columns: hsample = self.info.transform( tf_str, channels=[col]) # perform transforming for each column h_data = hsample.data[col] # get the transforming data log_data[col] = h_data.data # store the data into new df self.tf_df = log_data self.tf_matrix = log_data.values else: ################################# ## to-do: transform for ndarray## ################################# self.tf_df = self.df self.tf_matrix = self.df.values def som_mapping(self, x_n, y_n, d, sigma, lr, batch_size, neighborhood='gaussian', tf_str=None, if_fcs=True, seed=10): """ Perform SOM on transform data Parameters ---------- x_n : int the dimension of expected map y_n : int the dimension of expected map d : int vector length of input df sigma : float the standard deviation of initialized weights lr : float learning rate batch_size : int iteration times neighborhood : string e.g. 'gaussian', the initialized weights' distribution tf_str : string tranform parameters, go check self.tf() e.g. None, 'hlog' - the transform algorithm if_fcs : bool tranform parameters, go check self.tf() whethe the imput file is fcs file. If not, it should be a csv file only the fcs file could be transformed if it is a csv file, you have to make your own transform function seed : int for reproducing """ from minisom import MiniSom self.tf(tf_str, if_fcs) som = MiniSom(x_n, y_n, d, sigma, lr, neighborhood_function=neighborhood, random_seed=seed) # initialize the map som.pca_weights_init(self.tf_matrix) # initialize the weights print("Training...") som.train_batch(self.tf_matrix, batch_size, verbose=True) # random training print("\n...ready!") self.x_n = x_n self.y_n = y_n self.map_som = som self.weights = som.get_weights() self.flatten_weights = self.weights.reshape(x_n * y_n, d) def meta_clustering(self, cluster_class, min_n, max_n, iter_n, resample_proportion=0.7, verbose=False): """ Perform meta clustering on SOM Parameters ---------- cluster_class : class e.g. KMeans, a cluster class, like "from sklearn.cluster import KMeans" min_n : int the min proposed number of cluster max_n : int the max proposed number of cluster iter_n : int the iteration times for each number of clusters resample_proportion : float within (0, 1), the proportion of re-sampling when computing clustering verbose : bool whether print out the clustering process """ # initialize cluster cluster_ = ConsensusCluster(cluster_class, min_n, max_n, iter_n, resample_proportion=resample_proportion) cluster_.fit(self.flatten_weights, verbose) # fitting SOM weights into clustering algorithm self.cluster_map = cluster_ self.bestk = cluster_.bestK # the best number of clusters in range(min_n, max_n) # get the prediction of each weight vector on meta clusters (on bestK) self.flatten_class = cluster_.predict_data(self.flatten_weights) self.map_class = self.flatten_class.reshape(self.x_n, self.y_n) def vis(self, t, with_labels, node_size, edge_color): """ Visualize the meta cluster result with minimal spanning tree Parameters ---------- t : int total number of nodes, n = t * bestK with_labels : bool whether the node will be visualized with its cluster label node_size : int edge_color : string e.g 'b', the color of edges """ from matplotlib.gridspec import GridSpec import networkx as nx import numpy as np from collections import Counter import matplotlib.pyplot as plt import matplotlib.cm as cm # generate n clusters (n = bestK * t) self.cluster_map.bestK = self.bestk * t self.over_class = self.cluster_map.predict_data(self.flatten_weights) centroid_list = [] # Compute the centroid for each clusters for i in np.unique(self.over_class): centroid = np.mean(self.flatten_weights[self.over_class == i], axis=0) centroid_list.append(centroid) self.centroid_list = centroid_list # Generate a fully connected graph of n cluster centroids for future computation # on minimal spanning tree # (node: centroid of cluster, weight of edge: distance between two nodes) G = nx.Graph() for i in range(len(centroid_list)): for j in range(i + 1, len(centroid_list)): # compute the distance between two nodes w = np.sqrt( np.dot(centroid_list[i], centroid_list[i]) - 2 * np.dot(centroid_list[i], centroid_list[j]) + np.dot(centroid_list[j], centroid_list[j])) w /= 1 G.add_edge(i, j, weight=w) self.graph = G mst = nx.minimum_spanning_tree( G) # compute the minimal spanning tree graph self.mst = mst # generate the plot edges, weights = zip(*nx.get_edge_attributes(mst, 'weight').items()) print(self.bestk) color_list = cm.rainbow(np.linspace(0, 1, self.bestk)) color_map = [] for node in mst: class_id, _ = Counter( self.flatten_class[self.over_class == node]).most_common()[0] try: color_map.append(color_list[class_id]) except: print('something wrong with plotting cluster %d!' % class_id) nx.draw(mst, with_labels=with_labels, node_size=node_size, node_color=color_map, edgelist=edges, edge_color=edge_color, width=weights * 100, edge_cmap=plt.cm.Blues) # splt.show() def labeling(self, verbose=True): """ Make prediction for the whole dataset, add a column of 'category' with prediction. """ label_list = [] for i in range(len(self.tf_matrix)): # print the milestone if verbose: if i % 10000 == 0: print('%d samples done...' % i) xx = self.tf_matrix[i, :] # fetch the sample data winner = self.map_som.winner( xx ) # make prediction, prediction = the closest entry location in the SOM c = self.map_class[ winner] # from the location info get cluster info label_list.append(c) self.df['category'] = label_list self.tf_df['category'] = label_list
#fig, axs = plt.subplots(3, 1) #figure(); # Initialize a Figure fig = plt.figure() # Add Axes to the Figure #fig.add_axes([0,0,1,1]) #fig, axs = plt.subplots(3, 1) #fig.subplots_adjust(hspace=0.0, wspace= 0.0) plt.subplots(figsize=(2,2)) subplots_adjust(hspace=0.0, wspace= 0.0) # Load data datafile = r'/Users/anazuniga/Documents/phyton/FlowCytometryTools/FCplate 2 input/2MP1/export_P7R_Single Cells.fcs' tsample = FCMeasurement(ID='Test Sample', datafile=datafile) tsample = tsample.transform('hlog', channels=['GFP-H', 'SSC-H', 'mCherry-H', 'FSC-H', 'V1-H'], b=200.0) ax = subplot(3,1,1) # Plot #tsample.plot(['YL2-H', 'SSC-H'], bins=100, alpha=2, cmap=cm.hot); tsample.plot(['V1-H', 'SSC-H'], kind='scatter', alpha=0.05, color='black', s=1); ax.tick_params(labelbottom=False, labelleft=False) #ax1.set_ylim(0, 10000) #plt.scale('log') #logbins = np.geomspace(10, 1000000, 100) #plt.xscale('log') #ax1.set_yscale('log') ax.set_ylim(1000, 10000) #ax1.set_xlim(0, 10000) #ax1.xaxis.set_label_position('none') #ax.set_yticks('') ax.set_ylabel('')