Esempio n. 1
0
def test_partial_corr():
    dataf = msd.random_traj_dataset()
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(24.0, np.round(np.sum(pcorr), 1))

    dataf = msd.random_traj_dataset(nparts=10)
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(47.9, np.round(np.sum(pcorr), 1))

    dataf = msd.random_traj_dataset(nparts=10, seed=9)
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(33.4, np.round(np.sum(pcorr), 1))

    dataf = msd.random_traj_dataset(nparts=10, nframes=40, seed=9)
    msds = msd.all_msds2(dataf, frames=40)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(17.4, np.round(np.sum(pcorr), 1))

    dataf = msd.random_traj_dataset(nparts=10,
                                    nframes=40,
                                    ndist=(3, 5),
                                    seed=9)
    msds = msd.all_msds2(dataf, frames=40)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(35.7, np.round(np.sum(pcorr), 1))
Esempio n. 2
0
def test_calculate_features():
    d = {
        'Frame': [0, 1, 2, 3, 4, 0, 1, 2, 3, 4],
        'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
        'X': [0, 0, 1, 1, 2, 1, 1, 2, 2, 3],
        'Y': [0, 1, 1, 2, 2, 0, 1, 1, 2, 2]
    }
    df = pd.DataFrame(data=d)
    dfi = msd.all_msds2(df, frames=5)
    feat = ft.calculate_features(dfi)

    d = {
        'AR': np.ones(2) * 3.9999999999999996,
        'D_fit': np.ones(2) * 0.1705189932550273,
        'MSD_ratio': np.ones(2) * -0.2666666666666666,
        'X': [0.75, 1.75],
        'Y': [1.25, 1.25],
        'Track_ID': [1.0, 2.0],
        'alpha': np.ones(2) * 1.7793370720777268,
        'asymmetry1': np.ones(2) * 0.9440237239896903,
        'asymmetry2': np.ones(2) * 0.12,
        'asymmetry3': np.ones(2) * 0.3691430189107616,
        'boundedness': np.ones(2) * 0.25,
        'efficiency': np.ones(2) * 2.0,
        'elongation': np.ones(2) * 0.75,
        'fractal_dim': np.ones(2) * 1.333333333333333,
        'frames': [5.0, 5.0],
        'kurtosis': np.ones(2) * 1.166666666666667,
        'straightness': np.ones(2) * 0.7071067811865476,
        'trappedness': np.ones(2) * -0.15258529289428524
    }
    dfi = pd.DataFrame(data=d)

    pdt.assert_frame_equal(dfi, feat)
Esempio n. 3
0
def test_kmo():
    dataf = msd.random_traj_dataset(nparts=10, ndist=(1, 1), seed=3)
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    dataset = feat.drop(['frames', 'Track_ID'], axis=1)
    corrmatrix = np.corrcoef(dataset.transpose())
    npt.assert_equal(np.round(np.sum(corrmatrix), 1), 7.3)
Esempio n. 4
0
def test_pca_analysis():
    dataf = msd.random_traj_dataset(nparts=10, ndist=(2, 6))
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    pcadataset = pca.pca_analysis(feat,
                                  dropcols=['frames', 'Track_ID'],
                                  n_components=5)

    npt.assert_equal(np.round(np.sum(pcadataset.components.values), 3), 0.400)
Esempio n. 5
0
def test_plot_trajectories():
    prefix = 'test'
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    dataf = msd.random_traj_dataset(nparts=30, ndist=(1, 1), seed=3)
    msds = msd.all_msds2(dataf, frames=100)
    msds.to_csv(msd_file)
    feat = ft.calculate_features(msds)
    feat.to_csv(ft_file)

    hm.plot_trajectories(prefix, resolution=520, rows=1, cols=1, upload=False)
    assert os.path.isfile('traj_{}.png'.format(prefix))
Esempio n. 6
0
def test_plot_histogram():
    prefix = 'test'
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    dataf = msd.random_traj_dataset(nparts=30, ndist=(1, 1), seed=3)
    msds = msd.all_msds2(dataf, frames=100)
    msds.to_csv(msd_file)
    feat = ft.calculate_features(msds)
    feat.to_csv(ft_file)
    
    hm.plot_histogram(prefix, fps=1, umppx=1, frames=100, frame_interval=5, frame_range=5, y_range=10, upload=False)
    assert os.path.isfile('hist_{}.png'.format(prefix))
Esempio n. 7
0
def test_plot_particles_in_frame():
    prefix = 'test'
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    dataf = msd.random_traj_dataset(nparts=10, ndist=(1, 1), seed=3)
    msds = msd.all_msds2(dataf, frames=100)
    msds.to_csv(msd_file)
    feat = ft.calculate_features(msds)
    feat.to_csv(ft_file)

    hm.plot_particles_in_frame(prefix, x_range=100, y_range=20, upload=False)
    assert os.path.isfile('in_frame_{}.png'.format(prefix))
Esempio n. 8
0
def test_plot_individual_msds():
    prefix = 'test'
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    dataf = msd.random_traj_dataset(nparts=30, ndist=(1, 1), seed=3)
    msds = msd.all_msds2(dataf, frames=100)
    msds.to_csv(msd_file)
    feat = ft.calculate_features(msds)
    feat.to_csv(ft_file)

    geomean, gSEM = hm.plot_individual_msds(prefix, umppx=1, fps=1, y_range=400, alpha=0.3, upload=False)
    npt.assert_equal(332.8, np.round(np.sum(geomean), 1))
    npt.assert_equal(33.1, np.round(np.sum(gSEM), 1))
Esempio n. 9
0
def test_voronoi_finite_polygons_2d():
    prefix = 'test'
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    dataf = msd.random_traj_dataset(nparts=30, ndist=(1, 1), seed=3)
    msds = msd.all_msds2(dataf, frames=100)
    msds.to_csv(msd_file)
    feat = ft.calculate_features(msds)
    feat.to_csv(ft_file)

    xs = feat['X'].astype(int)
    ys = feat['Y'].astype(int)
    points = np.zeros((xs.shape[0], 2))
    points[:, 0] = xs
    points[:, 1] = ys

    vor = Voronoi(points)
    regions, vertices = hm.voronoi_finite_polygons_2d(vor)

    npt.assert_equal(243.8, np.round(np.mean(vertices), 1))
Esempio n. 10
0
def download_and_calc_MSDs(prefix):

    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import os
    import os.path as op
    import numpy as np
    import numpy.ma as ma
    import pandas as pd

    remote_folder = "01_18_Experiment/{}".format(prefix.split('_')[0])
    local_folder = os.getcwd()
    ires = 512

    for row in range(0, 4):
        for col in range(0, 4):
            filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
            to_download = remote_folder+'/'+filename
            local_name = local_folder+'/'+filename
            aws.download_s3(to_download, local_name)
            if row==0 and col==0:
                merged = msd.all_msds(ut.csv_to_pd(local_name))
            else:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires*row
                to_add['Y'] = to_add['Y'] + ires*col
                to_add['Track_ID'] = to_add['Track_ID'] + max(merged['Track_ID'])
                merged.append(msd.all_msds(to_add))
            print('Successfully downloaded and calculated MSDs for {}_{}_{}'.format(prefix, row, col))

    merged.to_csv('MSD_{}.csv'.format(prefix))
    print('Saved MSDs as MSD_{}.csv'.format(prefix))
    merged_ft = ft.calculate_features(merged)
    merged_ft.to_csv('features_{}.csv'.format(prefix))
    print('Saved features as features_{}.csv'.format(prefix))
Esempio n. 11
0
def assemble_msds(prefix, remote_folder, bucket,
                  ires=(512, 512), frames=651):
    '''Calculates MSDs and features from input trajectory files

    A function based on msd.all_msds2 and features.calculate_features, creates
    msd and feature csv files from input trajectory files and uploads to S3.
    Designed to work with Cloudknot for parallelizable workflows. Typically,
    this function is used in conjunction with kn.split and kn.tracking for an
    entire workflow.

    prefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    frames : int
        Number of frames in input videos.

    '''

    import os
    import boto3
    import diff_classifier.aws as aws
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.utils as ut

    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder+'/'+filename
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    s3 = boto3.client('s3')

    # names = []
    # for i in range(0, 4):
    #     for j in range(0, 4):
    #         names.append('{}_{}_{}.tif'.format(prefix, i, j))
    all_objects = s3.list_objects(Bucket=bucket,
                                  Prefix='{}/{}_'.format(remote_folder,
                                                         prefix))
    names = []
    rows = 0
    cols = 0
    for entry in all_objects['Contents']:
        name = entry['Key'].split('/')[-1]
        names.append(name)
        row = int(name.split(prefix)[1].split('.')[0].split('_')[-2])
        col = int(name.split(prefix)[1].split('.')[0].split('_')[-1])
        if row > rows:
            rows = row
        if col > cols:
            cols = col
    rows = rows + 1
    cols = cols + 1

    counter = 0
    for name in names:
        row = int(name.split(prefix)[1].split('.')[0].split('_')[-2])
        col = int(name.split(prefix)[1].split('.')[0].split('_')[-1])

        filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
        aws.download_s3(remote_folder+'/'+filename, filename,
                        bucket_name=bucket)
        local_name = filename

        if counter == 0:
            to_add = ut.csv_to_pd(local_name)
            to_add['X'] = to_add['X'] + ires[0]*col
            to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
            merged = msd.all_msds2(to_add, frames=frames)
        else:

            if merged.shape[0] > 0:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0]*col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
                to_add['Track_ID'] = to_add['Track_ID'
                                            ] + max(merged['Track_ID']) + 1
            else:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0]*col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
                to_add['Track_ID'] = to_add['Track_ID']

            merged = merged.append(msd.all_msds2(to_add, frames=frames))
            print('Done calculating MSDs for row {} and col {}'.format(row,
                                                                       col))
        counter = counter + 1

    merged.to_csv(msd_file)
    aws.upload_s3(msd_file, remote_folder+'/'+msd_file, bucket_name=bucket)
    merged_ft = ft.calculate_features(merged)
    merged_ft.to_csv(ft_file)
    aws.upload_s3(ft_file, remote_folder+'/'+ft_file, bucket_name=bucket)

    os.remove(ft_file)
    os.remove(msd_file)
    for name in names:
        outfile = 'Traj_' + name.split('.')[0] + '.csv'
        os.remove(outfile)
Esempio n. 12
0
def download_split_track_msds(prefix):
    """
    1. Checks to see if features file exists.
    2. If not, checks to see if image partitioning has occured.
    3. If yes, checks to see if tracking has occured.
    4. Regardless, tracks, calculates MSDs and features.
    """

    import matplotlib as mpl
    mpl.use('Agg')
    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.imagej as ij
    import diff_classifier.heatmaps as hm

    from scipy.spatial import Voronoi
    import scipy.stats as stats
    from shapely.geometry import Point
    from shapely.geometry.polygon import Polygon
    import matplotlib.cm as cm
    import os
    import os.path as op
    import numpy as np
    import numpy.ma as ma
    import pandas as pd
    import boto3

    #Splitting section
    ###############################################################################################
    remote_folder = "01_18_Experiment/{}".format(prefix.split('_')[0])
    local_folder = os.getcwd()
    ires = 512
    frames = 651
    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder+'/'+filename
    local_name = local_folder+'/'+filename

    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    s3 = boto3.client('s3')

    names = []
    for i in range(0, 4):
        for j in range(0, 4):
            names.append('{}_{}_{}.tif'.format(prefix, i, j))

    try:
        obj = s3.head_object(Bucket='ccurtis7.pup', Key=remote_folder+'/'+ft_file)
    except:

        try:
            for name in names:
                aws.download_s3(remote_folder+'/'+name, name)
        except:
            aws.download_s3(remote_name, local_name)
            names = ij.partition_im(local_name)
            for name in names:
                aws.upload_s3(name, remote_folder+'/'+name)
                print("Done with splitting.  Should output file of name {}".format(remote_folder+'/'+name))

        #Tracking section
        ################################################################################################
        for name in names:
            outfile = 'Traj_' + name.split('.')[0] + '.csv'
            local_im = op.join(local_folder, name)

            row = int(name.split('.')[0].split('_')[4])
            col = int(name.split('.')[0].split('_')[5])

            try:
                aws.download_s3(remote_folder+'/'+outfile, outfile)
            except:
                test_intensity = ij.mean_intensity(local_im)
                if test_intensity > 500:
                    quality = 245
                else:
                    quality = 4.5

                if row==3:
                    y = 485
                else:
                    y = 511

                ij.track(local_im, outfile, template=None, fiji_bin=None, radius=4.5, threshold=0.,
                         do_median_filtering=True, quality=quality, x=511, y=y, ylo=1, median_intensity=300.0, snr=0.0,
                         linking_max_distance=8.0, gap_closing_max_distance=10.0, max_frame_gap=2,
                         track_displacement=10.0)

                aws.upload_s3(outfile, remote_folder+'/'+outfile)
            print("Done with tracking.  Should output file of name {}".format(remote_folder+'/'+outfile))


        #MSD and features section
        #################################################################################################
        files_to_big = False
        size_limit = 10

        for name in names:
            outfile = 'Traj_' + name.split('.')[0] + '.csv'
            local_im = name
            file_size_MB = op.getsize(local_im)/1000000
            if file_size_MB > size_limit:
                file_to_big = True

        if files_to_big:
            print('One or more of the {} trajectory files exceeds {}MB in size.  Will not continue with MSD calculations.'.format(
                  prefix, size_limit))
        else:
            counter = 0
            for name in names:
                row = int(name.split('.')[0].split('_')[4])
                col = int(name.split('.')[0].split('_')[5])

                filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
                local_name = local_folder+'/'+filename

                if counter == 0:
                    to_add = ut.csv_to_pd(local_name)
                    to_add['X'] = to_add['X'] + ires*col
                    to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                    merged = msd.all_msds2(to_add, frames=frames)
                else:

                    if merged.shape[0] > 0:
                        to_add = ut.csv_to_pd(local_name)
                        to_add['X'] = to_add['X'] + ires*col
                        to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                        to_add['Track_ID'] = to_add['Track_ID'] + max(merged['Track_ID']) + 1
                    else:
                        to_add = ut.csv_to_pd(local_name)
                        to_add['X'] = to_add['X'] + ires*col
                        to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                        to_add['Track_ID'] = to_add['Track_ID']

                    merged = merged.append(msd.all_msds2(to_add, frames=frames))
                    print('Done calculating MSDs for row {} and col {}'.format(row, col))
                counter = counter + 1

            merged.to_csv(msd_file)
            aws.upload_s3(msd_file, remote_folder+'/'+msd_file)
            merged_ft = ft.calculate_features(merged)
            merged_ft.to_csv(ft_file)

            aws.upload_s3(ft_file, remote_folder+'/'+ft_file)

            #Plots
            features = ('AR', 'D_fit', 'alpha', 'MSD_ratio', 'Track_ID', 'X', 'Y', 'asymmetry1', 'asymmetry2', 'asymmetry3',
                        'boundedness', 'efficiency', 'elongation', 'fractal_dim', 'frames', 'kurtosis', 'straightness', 'trappedness')
            vmin = (1.36, 0.015, 0.72, -0.09, 0, 0, 0, 0.5, 0.049, 0.089, 0.0069, 0.65, 0.26, 1.28, 0, 1.66, 0.087, -0.225)
            vmax = (3.98, 2.6, 2.3, 0.015, max(merged_ft['Track_ID']), 2048, 2048, 0.99, 0.415, 0.53,
                    0.062, 3.44, 0.75, 1.79, 650, 3.33, 0.52, -0.208)
            die = {'features': features,
                   'vmin': vmin,
                   'vmax': vmax}
            di = pd.DataFrame(data=die)
            for i in range(0, di.shape[0]):
                hm.plot_heatmap(prefix, feature=di['features'][i], vmin=di['vmin'][i], vmax=di['vmax'][i])
                hm.plot_scatterplot(prefix, feature=di['features'][i], vmin=di['vmin'][i], vmax=di['vmax'][i])

            hm.plot_trajectories(prefix)
            try:
                hm.plot_histogram(prefix)
            except ValueError:
                print("Couldn't plot histogram.")
            hm.plot_particles_in_frame(prefix)
            gmean1, gSEM1 = hm.plot_individual_msds(prefix, alpha=0.05)
Esempio n. 13
0
def assemble_msds(prefix,
                  remote_folder,
                  bucket='nancelab.publicfiles',
                  ires=(512, 512),
                  frames=651,
                  rows=4,
                  cols=4):
    '''Calculates MSDs and features from input trajectory files

    A function based on msd.all_msds2 and features.calculate_features, creates
    msd and feature csv files from input trajectory files and uploads to S3.

    prefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    frames : int
        Number of frames in input videos.
    rows : int
        Number of rows to split image into.
    cols : int
        Number of columns to split image into.

    '''

    import os
    import boto3
    import diff_classifier.aws as aws
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.utils as ut

    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder + '/' + filename
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    s3 = boto3.client('s3')

    names = []
    for i in range(0, 4):
        for j in range(0, 4):
            names.append('{}_{}_{}.tif'.format(prefix, i, j))

    counter = 0
    for name in names:
        row = int(name.split(prefix)[1].split('.')[0].split('_')[1])
        col = int(name.split(prefix)[1].split('.')[0].split('_')[2])

        filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
        aws.download_s3(remote_folder + '/' + filename,
                        filename,
                        bucket_name=bucket)
        local_name = filename

        if counter == 0:
            to_add = ut.csv_to_pd(local_name)
            to_add['X'] = to_add['X'] + ires[0] * col
            to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 - row)
            merged = msd.all_msds2(to_add, frames=frames)
        else:

            if merged.shape[0] > 0:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0] * col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 -
                                                                 row)
                to_add['Track_ID'] = to_add['Track_ID'] + max(
                    merged['Track_ID']) + 1
            else:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0] * col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 -
                                                                 row)
                to_add['Track_ID'] = to_add['Track_ID']

            merged = merged.append(msd.all_msds2(to_add, frames=frames))
            print('Done calculating MSDs for row {} and col {}'.format(
                row, col))
        counter = counter + 1

    merged.to_csv(msd_file)
    aws.upload_s3(msd_file, remote_folder + '/' + msd_file, bucket_name=bucket)
    merged_ft = ft.calculate_features(merged)
    merged_ft.to_csv(ft_file)
    aws.upload_s3(ft_file, remote_folder + '/' + ft_file, bucket_name=bucket)

    os.remove(ft_file)
    os.remove(msd_file)
    for name in names:
        outfile = 'Traj_' + name.split('.')[0] + '.csv'
        os.remove(outfile)