def test_plot_sample_scatter(self): sample = fk.Sample(fcs_path) xform_logicle = fk.transforms.LogicleTransform('logicle', param_t=10000, param_w=0.5, param_m=4.5, param_a=0) sample.apply_transform(xform_logicle) p = sample.plot_scatter('FL1-H', 'FL2-H', source='xform') self.assertIsInstance(p, Figure)
#reading, transforming and manipulating FCS files #Date Modified: Noveber 24 #Make sure you have Flow Kit installed import flowkit as fk import numpy as np import pandas as pd #here is the fcs file we will read. fcs_path = 'sampleFCS.fcs' #read in the FCS file sample = fk.Sample(fcs_path) #print to see how manny cells and channels there are #doing so tells us that there are 268796 cells (events) and 49 channels print(sample) #convert to numpy array dm = sample.get_raw_events() #now we will look at the channel names. This is helpful because we will get comprehsnible marker names. #we will then be only selecting channel corresponding to our marker of interest CNames = np.array(sample.pns_labels) #for downstream analysis we will keep the following channels corresponding to phenotypic and functional markers #you will need to customize this for your own analysis toKeep = [ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45 ]
def test_sample_repr(self): fcs_file_path = "examples/data/gate_ref/data1.fcs" sample = fk.Sample(fcs_path_or_data=fcs_file_path) sample_string = "Sample(v2.0, B07, 8 channels, 13367 events)" self.assertEqual(repr(sample), sample_string)
""" Unit tests for string representations """ import unittest import flowkit as fk from . import gating_strategy_prog_gate_tests as prog_test_data # import comp_matrix_01, logicle_xform1, hyperlog_xform1, poly1_gate data1_fcs_path = 'examples/data/gate_ref/data1.fcs' data1_sample = fk.Sample(data1_fcs_path) class StringReprTestCase(unittest.TestCase): """Tests related to string representations of FlowKit classes""" def test_vert_repr(self): vert = fk.Vertex([500, 5]) vert_string = "Vertex([500, 5])" self.assertEqual(repr(vert), vert_string) def test_dim_repr(self): poly1_dim1 = fk.Dimension('FL2-H', compensation_ref='FCS') dim_string = "Dimension(id: FL2-H)" self.assertEqual(repr(poly1_dim1), dim_string) def test_ratio_dim_repr(self): dim_rat1 = fk.RatioDimension( 'FL2Rat1', compensation_ref='uncompensated', range_min=3,
""" Unit tests for plotting functions """ import copy import unittest from bokeh.plotting.figure import Figure as bk_Figure from bokeh.layouts import Column as bk_Column from matplotlib.pyplot import Figure as mpl_Figure import flowkit as fk fcs_path = 'examples/data/gate_ref/data1.fcs' gml_path = 'examples/data/gate_ref/gml/gml_all_gates.xml' test_sample = fk.Sample(fcs_path, subsample=2000) test_gating_strategy = fk.parse_gating_xml(gml_path) class PlotTestCase(unittest.TestCase): """ Tests for plot functions/methods NOTE: Due to the difficulty of introspecting figures and images at a pixel-level, this TestCase only tests that plots are returned from plotting functions. """ def test_plot_sample_histogram(self): sample = copy.deepcopy(test_sample) xform_logicle = fk.transforms.LogicleTransform('logicle', param_t=10000, param_w=0.5, param_m=4.5, param_a=0) sample.apply_transform(xform_logicle) p = sample.plot_histogram(
# choose for development # fcs_group = [fcs_group[0]] # loop over tissue-specific fcs files and store sample data in dict data_dict = {} for fname in fcs_group: print(f'Storing data for {fname}') # extract sample metadata sample_metadata = fname.split('.fcs')[0] status = sample_metadata.split('_')[3] timepoint = sample_metadata.split('_')[1] replicate = sample_metadata.split('_')[4] # store cmpvs and tp-specific unstained fcs files as variables sample = fk.Sample(os.path.join(cmpvs_path, fname)) fiducial = fk.Sample( os.path.join(raw_path, f'control_{timepoint}_unstained.fcs') ) if channel not in ['fsc', 'ssc']: # apply logicle transformation to fcs files sample.apply_transform(xform) fiducial.apply_transform(xform) # get indices for current channel sample_channel_label = channel_metadata[channel][0] sample_channel_idx = sample.get_channel_index(sample_channel_label) if channel not in ['fsc', 'ssc']:
# choose for development # fcs_files = fcs_files[0:3] # append zeroed fcs file to dataframe for fname in fcs_files: print(f'Processing data for {fname}') sample_metadata = fname.split('.fcs')[0] timepoint = int(sample_metadata.split('_')[1]) tissue = sample_metadata.split('_')[2] status = sample_metadata.split('_')[3] replicate = int(sample_metadata.split('_')[4]) # assign fcs file as a variable sample = fk.Sample(os.path.join(cmpvs_path, fname)) # initialize dataframe to store zeroed sample data df_temp = pd.DataFrame() for channel in sorted(channel_metadata.keys()): if channel not in ['fsc', 'ssc']: # define logicle transformation model param_w = channel_metadata[channel][1] xform = fk.transforms.LogicleTransform('logicle', param_t=262144.0, param_w=param_w, param_m=4.5, param_a=0)
def main(): """ Main function """ data = [] time = [] for path in PATHS: sample = fk.Sample(path) data.append(load_data(sample)) time.append(load_time(sample)) sources = [] for i, e in enumerate(data): sources.extend([i] * len(e)) data = np.concatenate(data, axis=0) time = np.concatenate(time, axis=0) d = len(data[0]) # Initialize a new Annoy object and index it using 10 trees annoy = AnnoyIndex(d, metric="angular") for i, v in enumerate(data): annoy.add_item(i, v) annoy.build(10) # Create the k-nearest neighbor graph (k = 10) edge_list = [] for i in range(len(data)): for j in annoy.get_nns_by_item(i, 10): edge_list.append((i, j, cosine_distance(data[i], data[j]))) # Compute the layout from the edge list x, y, s, t, _ = tm.layout_from_edge_list(len(data), edge_list) legend_labels = [(0, "No Target Probe Negative Control"), (1, "Stained Sample")] # Create the plot faerun = Faerun( view="front", coords=False, legend_title= "RNA Flow Cytometry: evaluation of detection sensitivity in low abundant intracellular RNA ", ) faerun.add_scatter( "CYTO", { "x": x, "y": y, "c": sources, "labels": sources }, point_scale=1.0, max_point_size=10, shader="smoothCircle", colormap="Set1", has_legend=True, categorical=True, legend_labels=legend_labels, legend_title="Cell Types", ) faerun.add_tree("CYTO_tree", { "from": s, "to": t }, point_helper="CYTO", color="#222222") faerun.plot("cyto")
import flowkit as fk from bokeh.plotting import show fcs_file_path = "test_comp_example.fcs" comp_file_path = "comp_complete_example.csv" sample = fk.Sample(fcs_path_or_data=fcs_file_path, compensation=comp_file_path, subsample_count=50000, filter_negative_scatter=True, filter_anomalous_events=False) xform = fk.transforms.LogicleTransform('logicle', param_t=262144, param_w=0.5, param_m=4.5, param_a=0) sample.apply_transform(xform) fig = sample.plot_scatter(3, 6, source='xform', subsample=True) show(fig)
try: files = sorted(list(fcs_dir.glob(f"{_id}*{panel}*.fcs"))) fcs_file = files[0] # ^^ this will get the most recent in case the are copies (*(1) files) except IndexError: try: fff = list(fcs_dir.glob(f"{_id}x{panel}*.fcs")) # assert len(fff) in [0, 1] fcs_file = fff[0] except IndexError: print(f"Sample {sample_id} is missing!") failures.append((panel, sample_id)) continue try: s = fk.Sample(fcs_file) # this shouldn't happen anymore as correupt files aren't selected anymore except KeyboardInterrupt: raise except struct.error: print(f"Sample {sample_id} failed parsing FCS file!") failures.append((panel, sample_id)) continue dates[panel][sample_id] = s.metadata["date"] print("Concatenating and writing to disk.") dates_df = pd.DataFrame(dates).apply(pd.to_datetime) dates_df.index.name = "sample_id" dates_df.to_csv(metadata_dir / "facs_dates.csv")
import flowkit as fk from bokeh.plotting import show # paths to an FCS file and compensation matrix (saved as a simple CSV file) fcs_file_path = "data/test_comp_example.fcs" comp_file_path = "data/comp_complete_example.csv" # create a Sample instance and give the optional comp matrix # this file is slightly non-standard with a common off-by-one data offset, # so we force reading it by setting ignore_offset_error to True. sample = fk.Sample( fcs_path_or_data=fcs_file_path, compensation=comp_file_path, ignore_offset_error= True # only needed b/c FCS has off-by-one data offset issue ) # sub-sample events to 50k for better performance when plotting # the events are not deleted, and any analysis will be performed on all events. sample.subsample_events(50000) # create a LogicleTransform instance (one of many transform types in FlowKit) xform = fk.transforms.LogicleTransform('logicle', param_t=262144, param_w=0.5, param_m=4.5, param_a=0) # apply our transform to the sample # This will apply post-compensation if a comp matrix has already been loaded. sample.apply_transform(xform)
# Convert fcs to csv: base_dir = os.getcwd() fcs_dir = os.path.join(base_dir, 'fcs') xform_dir = os.path.join(base_dir, 'csv_xform') fcs_paths = glob(os.path.join(fcs_dir, '*.fcs')) xform = fk.transforms.AsinhTransform('my_xform', param_t=12000, param_m=4.0, param_a=0.7) for f in fcs_paths: sample = fk.Sample(f) sample.apply_transform(xform) new_name = os.path.basename(f).replace('fcs', 'csv') sample.export_csv(source='xform', filename=new_name, directory=xform_dir) # Extract Batch Control: from matplotlib.patches import Rectangle from matplotlib.path import Path import pandas as pd import numpy as np fs = glob('./csv_xform/*') for f in fs: