Exemple #1
0
    def test_plot_sample_scatter(self):
        sample = fk.Sample(fcs_path)
        xform_logicle = fk.transforms.LogicleTransform('logicle',
                                                       param_t=10000,
                                                       param_w=0.5,
                                                       param_m=4.5,
                                                       param_a=0)
        sample.apply_transform(xform_logicle)

        p = sample.plot_scatter('FL1-H', 'FL2-H', source='xform')

        self.assertIsInstance(p, Figure)
#reading, transforming and manipulating FCS files
#Date Modified: Noveber 24
#Make sure you have Flow Kit installed

import flowkit as fk
import numpy as np
import pandas as pd

#here is the fcs file we will read.
fcs_path = 'sampleFCS.fcs'

#read in the FCS file
sample = fk.Sample(fcs_path)

#print to see how manny cells and channels there are
#doing so tells us that there are 268796 cells (events) and 49 channels
print(sample)

#convert to numpy array
dm = sample.get_raw_events()

#now we will look at the channel names. This is helpful because we will get comprehsnible marker names.
#we will then be only selecting channel corresponding to our marker of interest
CNames = np.array(sample.pns_labels)

#for downstream analysis we will keep the following channels corresponding to phenotypic and functional markers
#you will need to customize this for your own analysis
toKeep = [
    9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
    28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45
]
Exemple #3
0
    def test_sample_repr(self):
        fcs_file_path = "examples/data/gate_ref/data1.fcs"
        sample = fk.Sample(fcs_path_or_data=fcs_file_path)
        sample_string = "Sample(v2.0, B07, 8 channels, 13367 events)"

        self.assertEqual(repr(sample), sample_string)
Exemple #4
0
"""
Unit tests for string representations
"""
import unittest
import flowkit as fk
from . import gating_strategy_prog_gate_tests as prog_test_data
# import comp_matrix_01, logicle_xform1, hyperlog_xform1, poly1_gate

data1_fcs_path = 'examples/data/gate_ref/data1.fcs'
data1_sample = fk.Sample(data1_fcs_path)


class StringReprTestCase(unittest.TestCase):
    """Tests related to string representations of FlowKit classes"""
    def test_vert_repr(self):
        vert = fk.Vertex([500, 5])
        vert_string = "Vertex([500, 5])"

        self.assertEqual(repr(vert), vert_string)

    def test_dim_repr(self):
        poly1_dim1 = fk.Dimension('FL2-H', compensation_ref='FCS')
        dim_string = "Dimension(id: FL2-H)"

        self.assertEqual(repr(poly1_dim1), dim_string)

    def test_ratio_dim_repr(self):
        dim_rat1 = fk.RatioDimension(
            'FL2Rat1',
            compensation_ref='uncompensated',
            range_min=3,
Exemple #5
0
"""
Unit tests for plotting functions
"""
import copy
import unittest
from bokeh.plotting.figure import Figure as bk_Figure
from bokeh.layouts import Column as bk_Column
from matplotlib.pyplot import Figure as mpl_Figure
import flowkit as fk

fcs_path = 'examples/data/gate_ref/data1.fcs'
gml_path = 'examples/data/gate_ref/gml/gml_all_gates.xml'
test_sample = fk.Sample(fcs_path, subsample=2000)
test_gating_strategy = fk.parse_gating_xml(gml_path)


class PlotTestCase(unittest.TestCase):
    """
    Tests for plot functions/methods

    NOTE: Due to the difficulty of introspecting figures and images at a
          pixel-level, this TestCase only tests that plots are returned
          from plotting functions.
    """

    def test_plot_sample_histogram(self):
        sample = copy.deepcopy(test_sample)
        xform_logicle = fk.transforms.LogicleTransform('logicle', param_t=10000, param_w=0.5, param_m=4.5, param_a=0)
        sample.apply_transform(xform_logicle)

        p = sample.plot_histogram(
Exemple #6
0
        # choose for development
        # fcs_group = [fcs_group[0]]

        # loop over tissue-specific fcs files and store sample data in dict
        data_dict = {}
        for fname in fcs_group:
            print(f'Storing data for {fname}')

            # extract sample metadata
            sample_metadata = fname.split('.fcs')[0]
            status = sample_metadata.split('_')[3]
            timepoint = sample_metadata.split('_')[1]
            replicate = sample_metadata.split('_')[4]

            # store cmpvs and tp-specific unstained fcs files as variables
            sample = fk.Sample(os.path.join(cmpvs_path, fname))
            fiducial = fk.Sample(
                os.path.join(raw_path, f'control_{timepoint}_unstained.fcs')
                )

            if channel not in ['fsc', 'ssc']:

                # apply logicle transformation to fcs files
                sample.apply_transform(xform)
                fiducial.apply_transform(xform)

            # get indices for current channel
            sample_channel_label = channel_metadata[channel][0]
            sample_channel_idx = sample.get_channel_index(sample_channel_label)

            if channel not in ['fsc', 'ssc']:
Exemple #7
0
# choose for development
# fcs_files = fcs_files[0:3]

# append zeroed fcs file to dataframe
for fname in fcs_files:

    print(f'Processing data for {fname}')

    sample_metadata = fname.split('.fcs')[0]
    timepoint = int(sample_metadata.split('_')[1])
    tissue = sample_metadata.split('_')[2]
    status = sample_metadata.split('_')[3]
    replicate = int(sample_metadata.split('_')[4])

    # assign fcs file as a variable
    sample = fk.Sample(os.path.join(cmpvs_path, fname))

    # initialize dataframe to store zeroed sample data
    df_temp = pd.DataFrame()

    for channel in sorted(channel_metadata.keys()):
        if channel not in ['fsc', 'ssc']:

            # define logicle transformation model
            param_w = channel_metadata[channel][1]
            xform = fk.transforms.LogicleTransform('logicle',
                                                   param_t=262144.0,
                                                   param_w=param_w,
                                                   param_m=4.5,
                                                   param_a=0)
Exemple #8
0
def main():
    """ Main function """
    data = []
    time = []
    for path in PATHS:
        sample = fk.Sample(path)
        data.append(load_data(sample))
        time.append(load_time(sample))

    sources = []
    for i, e in enumerate(data):
        sources.extend([i] * len(e))

    data = np.concatenate(data, axis=0)
    time = np.concatenate(time, axis=0)

    d = len(data[0])

    # Initialize a new Annoy object and index it using 10 trees
    annoy = AnnoyIndex(d, metric="angular")
    for i, v in enumerate(data):
        annoy.add_item(i, v)
    annoy.build(10)

    # Create the k-nearest neighbor graph (k = 10)
    edge_list = []
    for i in range(len(data)):
        for j in annoy.get_nns_by_item(i, 10):
            edge_list.append((i, j, cosine_distance(data[i], data[j])))

    # Compute the layout from the edge list
    x, y, s, t, _ = tm.layout_from_edge_list(len(data), edge_list)

    legend_labels = [(0, "No Target Probe Negative Control"),
                     (1, "Stained Sample")]

    # Create the plot
    faerun = Faerun(
        view="front",
        coords=False,
        legend_title=
        "RNA Flow Cytometry: evaluation of detection sensitivity in low abundant intracellular RNA ",
    )
    faerun.add_scatter(
        "CYTO",
        {
            "x": x,
            "y": y,
            "c": sources,
            "labels": sources
        },
        point_scale=1.0,
        max_point_size=10,
        shader="smoothCircle",
        colormap="Set1",
        has_legend=True,
        categorical=True,
        legend_labels=legend_labels,
        legend_title="Cell Types",
    )
    faerun.add_tree("CYTO_tree", {
        "from": s,
        "to": t
    },
                    point_helper="CYTO",
                    color="#222222")

    faerun.plot("cyto")
import flowkit as fk
from bokeh.plotting import show

fcs_file_path = "test_comp_example.fcs"
comp_file_path = "comp_complete_example.csv"

sample = fk.Sample(fcs_path_or_data=fcs_file_path,
                   compensation=comp_file_path,
                   subsample_count=50000,
                   filter_negative_scatter=True,
                   filter_anomalous_events=False)

xform = fk.transforms.LogicleTransform('logicle',
                                       param_t=262144,
                                       param_w=0.5,
                                       param_m=4.5,
                                       param_a=0)
sample.apply_transform(xform)

fig = sample.plot_scatter(3, 6, source='xform', subsample=True)

show(fig)
Exemple #10
0
        try:
            files = sorted(list(fcs_dir.glob(f"{_id}*{panel}*.fcs")))
            fcs_file = files[0]
            # ^^ this will get the most recent in case the are copies (*(1) files)
        except IndexError:
            try:
                fff = list(fcs_dir.glob(f"{_id}x{panel}*.fcs"))
                # assert len(fff) in [0, 1]
                fcs_file = fff[0]
            except IndexError:
                print(f"Sample {sample_id} is missing!")
                failures.append((panel, sample_id))
                continue

        try:
            s = fk.Sample(fcs_file)
            # this shouldn't happen anymore as correupt files aren't selected anymore
        except KeyboardInterrupt:
            raise
        except struct.error:
            print(f"Sample {sample_id} failed parsing FCS file!")
            failures.append((panel, sample_id))
            continue

        dates[panel][sample_id] = s.metadata["date"]

print("Concatenating and writing to disk.")
dates_df = pd.DataFrame(dates).apply(pd.to_datetime)
dates_df.index.name = "sample_id"
dates_df.to_csv(metadata_dir / "facs_dates.csv")
Exemple #11
0
import flowkit as fk
from bokeh.plotting import show

# paths to an FCS file and compensation matrix (saved as a simple CSV file)
fcs_file_path = "data/test_comp_example.fcs"
comp_file_path = "data/comp_complete_example.csv"

# create a Sample instance and give the optional comp matrix
# this file is slightly non-standard with a common off-by-one data offset,
# so we force reading it by setting ignore_offset_error to True.
sample = fk.Sample(
    fcs_path_or_data=fcs_file_path,
    compensation=comp_file_path,
    ignore_offset_error=
    True  # only needed b/c FCS has off-by-one data offset issue
)

# sub-sample events to 50k for better performance when plotting
# the events are not deleted, and any analysis will be performed on all events.
sample.subsample_events(50000)

# create a LogicleTransform instance (one of many transform types in FlowKit)
xform = fk.transforms.LogicleTransform('logicle',
                                       param_t=262144,
                                       param_w=0.5,
                                       param_m=4.5,
                                       param_a=0)

# apply our transform to the sample
# This will apply post-compensation if a comp matrix has already been loaded.
sample.apply_transform(xform)
Exemple #12
0
# Convert fcs to csv:

base_dir = os.getcwd()

fcs_dir = os.path.join(base_dir, 'fcs')
xform_dir = os.path.join(base_dir, 'csv_xform')

fcs_paths = glob(os.path.join(fcs_dir, '*.fcs'))

xform = fk.transforms.AsinhTransform('my_xform',
                                     param_t=12000,
                                     param_m=4.0,
                                     param_a=0.7)

for f in fcs_paths:
    sample = fk.Sample(f)
    sample.apply_transform(xform)

    new_name = os.path.basename(f).replace('fcs', 'csv')

    sample.export_csv(source='xform', filename=new_name, directory=xform_dir)

# Extract Batch Control:
from matplotlib.patches import Rectangle
from matplotlib.path import Path
import pandas as pd
import numpy as np

fs = glob('./csv_xform/*')

for f in fs: