Exemple #1
0
def geomean_msd(prefix, umppx=0.16, fps=100.02, upload=True,
                   remote_folder="01_18_Experiment", bucket='ccurtis.data',
                   backup_frames=651):

    import pandas as pd
    import numpy as np
    import numpy.ma as ma
    import diff_classifier.aws as aws
    import scipy.stats as stats
    
    aws.download_s3('{}/msd_{}.csv'.format(remote_folder, prefix),
                    'msd_{}.csv'.format(prefix), bucket_name=bucket)
    merged = pd.read_csv('msd_{}.csv'.format(prefix))
    try:
        particles = int(max(merged['Track_ID']))
        frames = int(max(merged['Frame']))
        ypos = np.zeros((particles+1, frames+1))

        for i in range(0, particles+1):
            ypos[i, :] = merged.loc[merged.Track_ID == i, 'MSDs']*umppx*umppx
            xpos = merged.loc[merged.Track_ID == i, 'Frame']/fps

        geo_mean = np.nanmean(ma.log(ypos), axis=0)
        geo_stder = ma.masked_equal(stats.sem(ma.log(ypos), axis=0,
                                              nan_policy='omit'), 0.0)

    except ValueError:
        geo_mean = np.nan*np.ones(backup_frames)
        geo_stder = np.nan*np.ones(backup_frames)

    np.savetxt('geomean_{}.csv'.format(prefix), geo_mean, delimiter=",")
    np.savetxt('geoSEM_{}.csv'.format(prefix), geo_stder, delimiter=",")

    if upload:
        aws.upload_s3('geomean_{}.csv'.format(prefix),
                      remote_folder+'/'+'geomean_{}.csv'.format(prefix),
                      bucket_name=bucket)
        aws.upload_s3('geoSEM_{}.csv'.format(prefix),
                      remote_folder+'/'+'geoSEM_{}.csv'.format(prefix),
                      bucket_name=bucket)

    return geo_mean, geo_stder
Exemple #2
0
def download_and_track(filename):

    import diff_classifier.imagej as ij
    import diff_classifier.utils as ut
    import diff_classifier.aws as aws
    import os.path as op
    import pandas as pd

    aws.download_s3(filename, op.split(filename)[1])

    outfile = 'Traj_' + op.split(filename)[1].split('.')[0] + '.csv'
    local_im = op.split(filename)[1]
    if not op.isfile(outfile):
        ij.track(local_im, outfile, template=None, fiji_bin=None, radius=4.5, threshold=0.,
              do_median_filtering=True, quality=4.5, median_intensity=300.0, snr=0.0,
              linking_max_distance=8.0, gap_closing_max_distance=10.0, max_frame_gap=2,
              track_displacement=10.0)

        aws.upload_s3(outfile, op.split(filename)[0]+'/'+outfile)
    print("Done with tracking.  Should output file of name {}".format(op.split(filename)[0]+'/'+outfile))
Exemple #3
0
def download_and_split(filename):

    import diff_classifier.imagej as ij
    import diff_classifier.aws as aws
    import os.path as op

    local_name = op.split(filename)[1]
    DIR = op.split(filename)[0]
    try1 = filename.split('.')[0] + '_0_0.tif'
    try2 = filename.split('.')[0] + '_3_3.tif'

    s3 = boto3.client('s3')
    try:
        obj = s3.head_object(Bucket='ccurtis7.pup', Key=try1)
    except:
        try:
            obj = s3.head_object(Bucket='ccurtis7.pup', Key=try2)
        except:
            aws.download_s3(filename, local_name)
            names = ij.partition_im(local_name)
            for name in names:
                aws.upload_s3(name, op.split(filename)[0]+'/'+name)
    print("Done with splitting.  Should output file of name {}".format(op.split(filename)[0]+'/'+name))
Exemple #4
0
def download_and_calc_MSDs(prefix):

    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import os
    import os.path as op
    import numpy as np
    import numpy.ma as ma
    import pandas as pd

    remote_folder = "01_18_Experiment/{}".format(prefix.split('_')[0])
    local_folder = os.getcwd()
    ires = 512

    for row in range(0, 4):
        for col in range(0, 4):
            filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
            to_download = remote_folder+'/'+filename
            local_name = local_folder+'/'+filename
            aws.download_s3(to_download, local_name)
            if row==0 and col==0:
                merged = msd.all_msds(ut.csv_to_pd(local_name))
            else:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires*row
                to_add['Y'] = to_add['Y'] + ires*col
                to_add['Track_ID'] = to_add['Track_ID'] + max(merged['Track_ID'])
                merged.append(msd.all_msds(to_add))
            print('Successfully downloaded and calculated MSDs for {}_{}_{}'.format(prefix, row, col))

    merged.to_csv('MSD_{}.csv'.format(prefix))
    print('Saved MSDs as MSD_{}.csv'.format(prefix))
    merged_ft = ft.calculate_features(merged)
    merged_ft.to_csv('features_{}.csv'.format(prefix))
    print('Saved features as features_{}.csv'.format(prefix))
Exemple #5
0
def split(prefix, remote_folder, bucket,
          rows=4, cols=4, ores=(2048, 2048), ires=(512, 512)):
    '''Splits input image file into smaller images.

    A function based on imagej.partition_im that download images from an S3
    bucket, splits it into smaller images, and uploads these to S3. Designed to
    work with Cloudknot for parallelizable workflows. Typically, this function
    is used in conjunction with kn.tracking and kn.assemble_msds for a complete
    analysis.

    Parameters
    ----------
    prefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    rows : int
        Number of rows to split image into.
    cols : int
        Number of columns to split image into.
    ores : tuple of int
        Original resolution of input image.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.

    '''

    import os
    import boto3
    import diff_classifier.aws as aws
    import diff_classifier.imagej as ij

    local_folder = os.getcwd()
    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder+'/'+filename
    local_name = local_folder+'/'+filename
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)
    aws.download_s3(remote_name, local_name, bucket_name=bucket)

    s3 = boto3.client('s3')

    # Splitting section
    names = ij.partition_im(local_name, irows=rows, icols=cols,
                            ores=ores, ires=ires)

    # Names of subfiles
    # names = []
    # for i in range(0, 4):
    #     for j in range(0, 4):
    #         names.append('{}_{}_{}.tif'.format(prefix, i, j))

    for name in names:
        aws.upload_s3(name, remote_folder+'/'+name, bucket_name=bucket)
        os.remove(name)
        print("Done with splitting. Should output file of name {}".format(
              remote_folder+'/'+name))

    os.remove(filename)
Exemple #6
0
def assemble_msds(prefix, remote_folder, bucket,
                  ires=(512, 512), frames=651):
    '''Calculates MSDs and features from input trajectory files

    A function based on msd.all_msds2 and features.calculate_features, creates
    msd and feature csv files from input trajectory files and uploads to S3.
    Designed to work with Cloudknot for parallelizable workflows. Typically,
    this function is used in conjunction with kn.split and kn.tracking for an
    entire workflow.

    prefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    frames : int
        Number of frames in input videos.

    '''

    import os
    import boto3
    import diff_classifier.aws as aws
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.utils as ut

    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder+'/'+filename
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    s3 = boto3.client('s3')

    # names = []
    # for i in range(0, 4):
    #     for j in range(0, 4):
    #         names.append('{}_{}_{}.tif'.format(prefix, i, j))
    all_objects = s3.list_objects(Bucket=bucket,
                                  Prefix='{}/{}_'.format(remote_folder,
                                                         prefix))
    names = []
    rows = 0
    cols = 0
    for entry in all_objects['Contents']:
        name = entry['Key'].split('/')[-1]
        names.append(name)
        row = int(name.split(prefix)[1].split('.')[0].split('_')[-2])
        col = int(name.split(prefix)[1].split('.')[0].split('_')[-1])
        if row > rows:
            rows = row
        if col > cols:
            cols = col
    rows = rows + 1
    cols = cols + 1

    counter = 0
    for name in names:
        row = int(name.split(prefix)[1].split('.')[0].split('_')[-2])
        col = int(name.split(prefix)[1].split('.')[0].split('_')[-1])

        filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
        aws.download_s3(remote_folder+'/'+filename, filename,
                        bucket_name=bucket)
        local_name = filename

        if counter == 0:
            to_add = ut.csv_to_pd(local_name)
            to_add['X'] = to_add['X'] + ires[0]*col
            to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
            merged = msd.all_msds2(to_add, frames=frames)
        else:

            if merged.shape[0] > 0:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0]*col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
                to_add['Track_ID'] = to_add['Track_ID'
                                            ] + max(merged['Track_ID']) + 1
            else:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0]*col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
                to_add['Track_ID'] = to_add['Track_ID']

            merged = merged.append(msd.all_msds2(to_add, frames=frames))
            print('Done calculating MSDs for row {} and col {}'.format(row,
                                                                       col))
        counter = counter + 1

    merged.to_csv(msd_file)
    aws.upload_s3(msd_file, remote_folder+'/'+msd_file, bucket_name=bucket)
    merged_ft = ft.calculate_features(merged)
    merged_ft.to_csv(ft_file)
    aws.upload_s3(ft_file, remote_folder+'/'+ft_file, bucket_name=bucket)

    os.remove(ft_file)
    os.remove(msd_file)
    for name in names:
        outfile = 'Traj_' + name.split('.')[0] + '.csv'
        os.remove(outfile)
Exemple #7
0
def load_data(folder, filenames=[], **kwargs):
    """
    Load data either through the system or through aws S3.

    Parameters
    ----------
    folder : string :
        desired folder to import files from
    filenames : list of strings :
        desired files to import

    Optional Parameters
    -------------------
    download_list_file : string :
        if using a textfile containing multiple filenames, use this to designate location
        of this file within the folder.
        ex: folder/download_file_names.txt
    tag : list of strings :
        if tagging a dataframe file with a variable, use this to tag each file. Will cycle
        through list if list reaches end and there are stile files in the filenames list
    bucket_name : string :
        if using aws S3, declare this variable as an S3 bucket to look through. This will
        trigger the function so that folder is the folder in the bucket and filenames are
        the filenames to download in the bucket

    """
    data = pd.DataFrame()
    tag = None
    if 'download_list_file' in kwargs:
        list_path = os.path.join(folder, kwargs['download_list_file'][0])
        assert os.path.isfile(list_path) and os.access(list_path, os.R_OK), \
            f'{list_path} does not exhist or can not be read'
        try:
            with open(list_path, 'r') as f:
                filenames = f.read().splitlines()
        except IOError as err:
            print(f"Could not read {f}: {err}")
    if 'tag' in kwargs:
        tag = cycle(kwargs['tag'])
    if 'bucket_name' in kwargs:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(kwargs['bucket_name'])
        for filename in filenames:
            if tag:
                file_tag = next(tag)
            else:
                file_tag = None
            try:
                file_path = os.path.join(folder, filename)
                print(file_path)
                aws.download_s3(file_path, filename, bucket_name=bucket)
                file_data = pd.read_csv(filename, encoding="ISO-8859-1", index_col='Unnamed: 0')
                if file_tag:
                    size = file_data.shape[0]
                    file_data['Tag'] = pd.Series(size*[file_tag], index=file_data.index)
                data = pd.concat([data, file_data])
                del file_data
            except IOError as err:
                print(f'Skipped!: {filename}: {err}')
        return data
    for filename in filenames:
        if tag:
            file_tag = next(tag)
        else:
            file_tag = None
        try:
            file_path = os.path.join(folder, filename)
            print(file_path)

            if file_tag:
                size = file_data.shape[0]

            data = pd.concat([data, file_data])
        except IOError as err:
            print(f'Skipped!: {filename}: {err}')
    return data
Exemple #8
0
def plot_all_experiments(experiments, bucket='ccurtis.data', folder='test',
                         yrange=(10**-1, 10**1), fps=100.02,
                         xrange=(10**-2, 10**0), upload=True,
                         outfile='test.png', exponential=True):
    """Plots precision-weighted averages of MSD datasets.

    Plots pre-calculated precision-weighted averages of MSD datasets calculated
    from precision_averaging and stored in an AWS S3 bucket.

    Parameters
    ----------
    group : list of str
        List of experiment names to plot. Each experiment must have an MSD and
        SEM file associated with it in s3.
    bucket : str
        S3 bucket from which to download data.
    folder : str
        Folder in s3 bucket from which to download data.
    yrange : list of float
        Y range of plot
    xrange: list of float
        X range of plot
    upload : bool
        True to upload to S3
    outfile : str
        Filename of output image

    """

    n = len(experiments)

    color = iter(cm.viridis(np.linspace(0, 0.9, n)))

    fig = plt.figure(figsize=(8.5, 8.5))
    plt.xlim(xrange[0], xrange[1])
    plt.ylim(yrange[0], yrange[1])
    plt.xlabel('Tau (s)', fontsize=25)
    plt.ylabel(r'Mean Squared Displacement ($\mu$m$^2$)', fontsize=25)

    geo = {}
    gstder = {}
    counter = 0
    for experiment in experiments:
        aws.download_s3('{}/geomean_{}.csv'.format(folder, experiment),
                        'geomean_{}.csv'.format(experiment), bucket_name=bucket)
        aws.download_s3('{}/geoSEM_{}.csv'.format(folder, experiment),
                        'geoSEM_{}.csv'.format(experiment), bucket_name=bucket)

        geo[counter] = np.genfromtxt('geomean_{}.csv'.format(experiment))
        gstder[counter] = np.genfromtxt('geoSEM_{}.csv'.format(experiment))
        geo[counter] = ma.masked_equal(geo[counter], 0.0)
        gstder[counter] = ma.masked_equal(gstder[counter], 0.0)

        frames = np.shape(gstder[counter])[0]
        xpos = np.linspace(0, frames-1, frames)/fps
        c = next(color)

        if exponential:
            plt.loglog(xpos, np.exp(geo[counter]), c=c, linewidth=6,
                       label=experiment)
            plt.loglog(xpos, np.exp(geo[counter] - 1.96*gstder[counter]),
                       c=c, dashes=[6, 2], linewidth=4)
            plt.loglog(xpos, np.exp(geo[counter] + 1.96*gstder[counter]),
                       c=c, dashes=[6, 2], linewidth=4)
        else:
            plt.loglog(xpos, geo[counter], c=c, linewidth=6,
                       label=experiment)
            plt.loglog(xpos, geo[counter] - 1.96*gstder[counter], c=c,
                       dashes=[6, 2], linewidth=4)
            plt.loglog(xpos, geo[counter] + 1.96*gstder[counter], c=c,
                       dashes=[6, 2], linewidth=4)

        counter = counter + 1

    plt.legend(frameon=False, prop={'size': 16})

    if upload:
        fig.savefig(outfile, bbox_inches='tight')
        aws.upload_s3(outfile, folder+'/'+outfile, bucket_name=bucket)
Exemple #9
0
def regress_sys(folder,
                all_videos,
                yfit,
                training_size,
                randselect=True,
                trainingdata=[],
                frame=0,
                have_output=True,
                download=True,
                bucket_name='ccurtis.data'):
    """Uses regression based on image intensities to select tracking parameters.

    This function uses regression methods from the scikit-learn module to
    predict the lower quality cutoff values for particle filtering in TrackMate
    based on the intensity distributions of input images. Currently only uses
    the first frame of videos for analysis, and is limited to predicting
    quality values.

    In practice, users will run regress_sys twice in different modes to build
    a regression system. First, set have_output to False. Function will return
    list of randomly selected videos to include in the training dataset. The
    user should then manually track particles using the Trackmate GUI, and enter
    these values in during the next round as the input yfit variable.

    Parameters
    ----------
    folder : str
        S3 directory containing video files specified in all_videos.
    all_videos: list of str
        Contains prefixes of video filenames of entire video set to be
        tracked.  Training dataset will be some subset of these videos.
    yfit: numpy.ndarray
        Contains manually acquired quality levels using Trackmate for the
        files contained in the training dataset.
    training_size : int
        Number of files in training dataset.
    randselect : bool
        If True, will randomly select training videos from all_videos.
        If False, will use trainingdata as input training dataset.
    trainingdata : list of str
        Optional manually selected prefixes of video filenames to be
        used as training dataset.
    have_output: bool
        If you have already acquired the quality values (yfit) for the
        training dataset, set to True.  If False, it will output the files
        the user will need to acquire quality values for.
    bucket_name : str
        S3 bucket containing videos to be downloaded for regression
        calculations.

    Returns
    -------
    regress_object : list of sklearn.svm.classes.
        Contains list of regression objects assembled from the training
        datasets.  Uses the mean, 10th percentile, 90th percentile, and
        standard deviation intensities to predict the quality parameter
        in Trackmate.
    tprefix : list of str
        Contains randomly selected images from all_videos to be included in
        training dataset.

    """

    if randselect:
        tprefix = []
        for i in range(0, training_size):
            random.seed(i + 1)
            tprefix.append(all_videos[random.randint(0, len(all_videos))])
            if have_output is False:
                print("Get parameters for: {}".format(tprefix[i]))
    else:
        tprefix = trainingdata

    if have_output is True:
        # Define descriptors
        descriptors = np.zeros((training_size, 4))
        counter = 0
        for name in tprefix:
            local_im = name + '.tif'
            remote_im = "{}/{}".format(folder, local_im)
            if download:
                aws.download_s3(remote_im, local_im, bucket_name=bucket_name)
            test_image = sio.imread(local_im)
            descriptors[counter, 0] = np.mean(test_image[frame, :, :])
            descriptors[counter, 1] = np.std(test_image[frame, :, :])
            descriptors[counter, 2] = np.percentile(test_image[frame, :, :],
                                                    10)
            descriptors[counter, 3] = np.percentile(test_image[frame, :, :],
                                                    90)
            counter = counter + 1

        # Define regression techniques
        xfit = descriptors
        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        regress_object = []
        for item in classifiers:
            clf = item
            regress_object.append(clf.fit(xfit, yfit))

        return regress_object

    else:
        return tprefix
Exemple #10
0
def BF_cell_features(prefix, folder, bucket='ccurtis.data'):

    ffilename = 'features_{}.csv'.format(prefix)
    mfilename = 'msd_{}.csv'.format(prefix)
    bffilename = 'BF_cells_{}.tif'.format(prefix)
    biim = 'bi_BF_cells_{}.tif'.format(prefix)
    bimages = 'biproc_BF_cells_{}.png'.format(prefix)

    aws.download_s3('{}/{}'.format(folder, ffilename),
                    ffilename,
                    bucket_name=bucket)
    aws.download_s3('{}/{}'.format(folder, mfilename),
                    mfilename,
                    bucket_name=bucket)
    aws.download_s3('{}/{}'.format(folder, bffilename),
                    bffilename,
                    bucket_name=bucket)
    print('Successfully downloaded files')

    fstats = pd.read_csv(ffilename, encoding="ISO-8859-1")
    msds = pd.read_csv(mfilename, encoding="ISO-8859-1")
    bfimage = plt.imread(bffilename)
    tophimage = binary_BF(bfimage,
                          opense=disk(12),
                          bi_thresh=1.2,
                          tophatse=disk(20))
    plt.savefig(bimages)
    euimage = EuclideanTransform(tophimage) + EuclideanTransform(~tophimage)
    print('Successfully performed image processing')

    xa = -np.reshape(np.clip(
        (fstats.Y.values - 1).astype(int), a_min=0, a_max=2043),
                     newshape=(fstats.Y.shape[0], 1))
    ya = np.reshape(np.clip((fstats.X.values - 1).astype(int),
                            a_min=0,
                            a_max=2043),
                    newshape=(fstats.X.shape[0], 1))
    xya = [tuple(l) for l in np.concatenate((xa, ya), axis=1).tolist()]
    fstats['Cell Status'] = itemgetter(*xya)(tophimage)
    fstats['Cell Distance'] = itemgetter(*xya)(euimage)

    print('Successfully calculated Cell Status Params')

    frames = 651
    xb = -np.reshape(np.clip(
        (msds.Y.values - 1).astype(int), a_min=0, a_max=2043),
                     newshape=(int(msds.Y.shape[0]), 1))
    yb = np.reshape(np.clip((msds.X.values - 1).astype(int),
                            a_min=0,
                            a_max=2043),
                    newshape=(int(msds.X.shape[0]), 1))
    xyb = [tuple(l) for l in np.concatenate((xb, yb), axis=1).tolist()]
    msds['Cell Status'] = itemgetter(*xyb)(tophimage)
    msds['Cell Distance'] = itemgetter(*xyb)(euimage)

    msds_cell_status = np.reshape(msds['Cell Status'].values,
                                  newshape=(int(msds.X.shape[0] / frames),
                                            frames))
    msds_cell_distance = np.reshape(msds['Cell Distance'].values,
                                    newshape=(int(msds.X.shape[0] / frames),
                                              frames))
    fstats['Membrane Xing'] = np.sum(np.diff(msds_cell_status, axis=1) == True,
                                     axis=1)
    fstats['Distance Towards Cell'] = np.sum(np.diff(msds_cell_distance,
                                                     axis=1),
                                             axis=1)
    fstats['Percent Towards Cell'] = np.mean(
        np.diff(msds_cell_distance, axis=1) > 0, axis=1)
    print('Successfully calculated Membrane Xing Params')

    fstats.to_csv(ffilename, sep=',', encoding="ISO-8859-1")
    msds.to_csv(mfilename, sep=',', encoding="ISO-8859-1")
    plt.imsave(biim, tophimage, cmap='gray')

    aws.upload_s3(ffilename,
                  '{}/{}'.format(folder, ffilename),
                  bucket_name=bucket)
    aws.upload_s3(mfilename,
                  '{}/{}'.format(folder, mfilename),
                  bucket_name=bucket)
    aws.upload_s3(biim, '{}/{}'.format(folder, biim), bucket_name=bucket)
    aws.upload_s3(bimages, '{}/{}'.format(folder, bimages, bucket_name=bucket))
    print('Successfully uploaded files')

    return fstats
Exemple #11
0
to_track = []
frames = 651
fps = 100.02
umppx = 0.07

vids = 5
covers = ['10K', '1K', '5K', 'COOH']
slices = [4, 5, 6]
for cover in covers:
    for slic in slices:
        for num in range(1, vids + 1):
            #to_track.append('100x_0_4_1_2_gel_{}_bulk_vid_{}'.format(vis, num))
            to_track.append('{}_tissue_S{}_XY{}'.format(cover, slic, num))

geomean = {}
gSEM = {}
for sample_name in to_track[int(sys.argv[1]):int(sys.argv[2])]:
    # Users can toggle between using pre-calculated geomean files and calculating new values by commenting out the relevant
    # lines of code within the for loop.
    #aws.download_s3('{}/geomean_{}.csv'.format(folder, sample_name), 'geomean_{}.csv'.format(sample_name), bucket_name=bucket)
    #aws.download_s3('{}/geoSEM_{}.csv'.format(folder, sample_name), 'geoSEM_{}.csv'.format(sample_name), bucket_name=bucket)
    #geomean[sample_name] = np.genfromtxt('geomean_{}.csv'.format(sample_name))
    #gSEM[sample_name] = np.genfromtxt('geoSEM_{}.csv'.format(sample_name))

    aws.download_s3('{}/msd_{}.csv'.format(folder, sample_name),
                    'msd_{}.csv'.format(sample_name),
                    bucket_name=bucket)
    geomean[sample_name], gSEM[sample_name] = msd.geomean_msdisp(
        sample_name, umppx=umppx, fps=fps, remote_folder=folder, bucket=bucket)
    print('Done with {}'.format(sample_name))
Exemple #12
0
def tracking(
    subprefix,
    remote_folder,
    bucket='nancelab.publicfiles',
    regress_f='regress.obj',
    rows=4,
    cols=4,
    ires=(512, 512),
    tparams={
        'frames': 651,
        'radius': 3.0,
        'threshold': 0.0,
        'do_median_filtering': False,
        'quality': 15.0,
        'xdims': (0, 511),
        'ydims': (1, 511),
        'median_intensity': 300.0,
        'snr': 0.0,
        'linking_max_distance': 6.0,
        'gap_closing_max_distance': 10.0,
        'max_frame_gap': 3,
        'track_duration': 20.0
    }):
    '''Tracks particles in input image using Trackmate.

    A function based on imagej.track that downloads the image from S3, tracks
    particles using Trackmate, and uploads the resulting trajectory file to S3.

    Parameters
    ----------
    subprefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    regress_f : string
        Name of regress object used to predict quality parameter.
    rows : int
        Number of rows to split image into.
    cols : int
        Number of columns to split image into.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    tparams : dict
        Dictionary containing tracking parameters to Trackmate analysis.

    '''

    import os
    import os.path as op
    import boto3
    from sklearn.externals import joblib
    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.imagej as ij

    local_folder = os.getcwd()
    filename = '{}.tif'.format(subprefix)
    remote_name = remote_folder + '/' + filename
    local_name = local_folder + '/' + filename
    outfile = 'Traj_' + subprefix + '.csv'
    local_im = op.join(local_folder, '{}.tif'.format(subprefix))
    row = int(subprefix.split('_')[-2])
    col = int(subprefix.split('_')[-1])

    aws.download_s3(remote_folder + '/' + regress_f,
                    regress_f,
                    bucket_name=bucket)
    with open(regress_f, 'rb') as fp:
        regress = joblib.load(fp)

    s3 = boto3.client('s3')

    aws.download_s3('{}/{}'.format(remote_folder, '{}.tif'.format(subprefix)),
                    local_im,
                    bucket_name=bucket)
    tparams['quality'] = ij.regress_tracking_params(
        regress, subprefix, regmethod='PassiveAggressiveRegressor')

    if row == rows - 1:
        tparams['ydims'] = (tparams['ydims'][0], ires[1] - 27)

    ij.track(local_im, outfile, template=None, fiji_bin=None, tparams=tparams)
    aws.upload_s3(outfile, remote_folder + '/' + outfile, bucket_name=bucket)
    print("Done with tracking.  Should output file of name {}".format(
        remote_folder + '/' + outfile))
Exemple #13
0
def assemble_msds(prefix,
                  remote_folder,
                  bucket='nancelab.publicfiles',
                  ires=(512, 512),
                  frames=651,
                  rows=4,
                  cols=4):
    '''Calculates MSDs and features from input trajectory files

    A function based on msd.all_msds2 and features.calculate_features, creates
    msd and feature csv files from input trajectory files and uploads to S3.

    prefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    frames : int
        Number of frames in input videos.
    rows : int
        Number of rows to split image into.
    cols : int
        Number of columns to split image into.

    '''

    import os
    import boto3
    import diff_classifier.aws as aws
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.utils as ut

    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder + '/' + filename
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    s3 = boto3.client('s3')

    names = []
    for i in range(0, 4):
        for j in range(0, 4):
            names.append('{}_{}_{}.tif'.format(prefix, i, j))

    counter = 0
    for name in names:
        row = int(name.split(prefix)[1].split('.')[0].split('_')[1])
        col = int(name.split(prefix)[1].split('.')[0].split('_')[2])

        filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
        aws.download_s3(remote_folder + '/' + filename,
                        filename,
                        bucket_name=bucket)
        local_name = filename

        if counter == 0:
            to_add = ut.csv_to_pd(local_name)
            to_add['X'] = to_add['X'] + ires[0] * col
            to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 - row)
            merged = msd.all_msds2(to_add, frames=frames)
        else:

            if merged.shape[0] > 0:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0] * col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 -
                                                                 row)
                to_add['Track_ID'] = to_add['Track_ID'] + max(
                    merged['Track_ID']) + 1
            else:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0] * col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 -
                                                                 row)
                to_add['Track_ID'] = to_add['Track_ID']

            merged = merged.append(msd.all_msds2(to_add, frames=frames))
            print('Done calculating MSDs for row {} and col {}'.format(
                row, col))
        counter = counter + 1

    merged.to_csv(msd_file)
    aws.upload_s3(msd_file, remote_folder + '/' + msd_file, bucket_name=bucket)
    merged_ft = ft.calculate_features(merged)
    merged_ft.to_csv(ft_file)
    aws.upload_s3(ft_file, remote_folder + '/' + ft_file, bucket_name=bucket)

    os.remove(ft_file)
    os.remove(msd_file)
    for name in names:
        outfile = 'Traj_' + name.split('.')[0] + '.csv'
        os.remove(outfile)
Exemple #14
0
def tracking(subprefix, remote_folder, bucket, tparams,
             regress_f='regress.obj', rows=4, cols=4, ires=(512, 512)):
    '''Tracks particles in input image using Trackmate.

    A function based on imagej.track that downloads the image from S3, tracks
    particles using Trackmate, and uploads the resulting trajectory file to S3.
    Designed to work with Cloudknot for parallelizable workflows. Typically,
    this function is used in conjunction with kn.split and kn.assemble_msds for
    a complete analysis.

    Parameters
    ----------
    subprefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    regress_f : string
        Name of regress object used to predict quality parameter.
    rows : int
        Number of rows to split image into.
    cols : int
        Number of columns to split image into.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    tparams : dict
        Dictionary containing tracking parameters to Trackmate analysis.

    '''

    import os
    import os.path as op
    import boto3
    from sklearn.externals import joblib
    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.imagej as ij

    local_folder = os.getcwd()
    filename = '{}.tif'.format(subprefix)
    remote_name = remote_folder+'/'+filename
    local_name = local_folder+'/'+filename
    outfile = 'Traj_' + subprefix + '.csv'
    local_im = op.join(local_folder, '{}.tif'.format(subprefix))
    row = int(subprefix.split('_')[-2])
    col = int(subprefix.split('_')[-1])

    aws.download_s3(remote_folder+'/'+regress_f, regress_f, bucket_name=bucket)
    with open(regress_f, 'rb') as fp:
        regress = joblib.load(fp)

    s3 = boto3.client('s3')

    aws.download_s3('{}/{}'.format(remote_folder,
                    '{}.tif'.format(subprefix)),
                    local_im, bucket_name=bucket)
    tparams['quality'] = ij.regress_tracking_params(regress, subprefix,
                                                    regmethod='PassiveAggressiveRegressor')

    if row == rows-1:
        tparams['ydims'] = (tparams['ydims'][0], ires[1] - 27)

    ij.track(local_im, outfile, template=None, fiji_bin=None,
             tparams=tparams)
    aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
    print("Done with tracking.  Should output file of name {}".format(
          remote_folder+'/'+outfile))
Exemple #15
0
def download_split_track_msds(prefix):
    """
    1. Checks to see if features file exists.
    2. If not, checks to see if image partitioning has occured.
    3. If yes, checks to see if tracking has occured.
    4. Regardless, tracks, calculates MSDs and features.
    """

    import matplotlib as mpl
    mpl.use('Agg')
    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.imagej as ij
    import diff_classifier.heatmaps as hm

    from scipy.spatial import Voronoi
    import scipy.stats as stats
    from shapely.geometry import Point
    from shapely.geometry.polygon import Polygon
    import matplotlib.cm as cm
    import os
    import os.path as op
    import numpy as np
    import numpy.ma as ma
    import pandas as pd
    import boto3

    #Splitting section
    ###############################################################################################
    remote_folder = "01_18_Experiment/{}".format(prefix.split('_')[0])
    local_folder = os.getcwd()
    ires = 512
    frames = 651
    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder+'/'+filename
    local_name = local_folder+'/'+filename

    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    s3 = boto3.client('s3')

    names = []
    for i in range(0, 4):
        for j in range(0, 4):
            names.append('{}_{}_{}.tif'.format(prefix, i, j))

    try:
        obj = s3.head_object(Bucket='ccurtis7.pup', Key=remote_folder+'/'+ft_file)
    except:

        try:
            for name in names:
                aws.download_s3(remote_folder+'/'+name, name)
        except:
            aws.download_s3(remote_name, local_name)
            names = ij.partition_im(local_name)
            for name in names:
                aws.upload_s3(name, remote_folder+'/'+name)
                print("Done with splitting.  Should output file of name {}".format(remote_folder+'/'+name))

        #Tracking section
        ################################################################################################
        for name in names:
            outfile = 'Traj_' + name.split('.')[0] + '.csv'
            local_im = op.join(local_folder, name)

            row = int(name.split('.')[0].split('_')[4])
            col = int(name.split('.')[0].split('_')[5])

            try:
                aws.download_s3(remote_folder+'/'+outfile, outfile)
            except:
                test_intensity = ij.mean_intensity(local_im)
                if test_intensity > 500:
                    quality = 245
                else:
                    quality = 4.5

                if row==3:
                    y = 485
                else:
                    y = 511

                ij.track(local_im, outfile, template=None, fiji_bin=None, radius=4.5, threshold=0.,
                         do_median_filtering=True, quality=quality, x=511, y=y, ylo=1, median_intensity=300.0, snr=0.0,
                         linking_max_distance=8.0, gap_closing_max_distance=10.0, max_frame_gap=2,
                         track_displacement=10.0)

                aws.upload_s3(outfile, remote_folder+'/'+outfile)
            print("Done with tracking.  Should output file of name {}".format(remote_folder+'/'+outfile))


        #MSD and features section
        #################################################################################################
        files_to_big = False
        size_limit = 10

        for name in names:
            outfile = 'Traj_' + name.split('.')[0] + '.csv'
            local_im = name
            file_size_MB = op.getsize(local_im)/1000000
            if file_size_MB > size_limit:
                file_to_big = True

        if files_to_big:
            print('One or more of the {} trajectory files exceeds {}MB in size.  Will not continue with MSD calculations.'.format(
                  prefix, size_limit))
        else:
            counter = 0
            for name in names:
                row = int(name.split('.')[0].split('_')[4])
                col = int(name.split('.')[0].split('_')[5])

                filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
                local_name = local_folder+'/'+filename

                if counter == 0:
                    to_add = ut.csv_to_pd(local_name)
                    to_add['X'] = to_add['X'] + ires*col
                    to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                    merged = msd.all_msds2(to_add, frames=frames)
                else:

                    if merged.shape[0] > 0:
                        to_add = ut.csv_to_pd(local_name)
                        to_add['X'] = to_add['X'] + ires*col
                        to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                        to_add['Track_ID'] = to_add['Track_ID'] + max(merged['Track_ID']) + 1
                    else:
                        to_add = ut.csv_to_pd(local_name)
                        to_add['X'] = to_add['X'] + ires*col
                        to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                        to_add['Track_ID'] = to_add['Track_ID']

                    merged = merged.append(msd.all_msds2(to_add, frames=frames))
                    print('Done calculating MSDs for row {} and col {}'.format(row, col))
                counter = counter + 1

            merged.to_csv(msd_file)
            aws.upload_s3(msd_file, remote_folder+'/'+msd_file)
            merged_ft = ft.calculate_features(merged)
            merged_ft.to_csv(ft_file)

            aws.upload_s3(ft_file, remote_folder+'/'+ft_file)

            #Plots
            features = ('AR', 'D_fit', 'alpha', 'MSD_ratio', 'Track_ID', 'X', 'Y', 'asymmetry1', 'asymmetry2', 'asymmetry3',
                        'boundedness', 'efficiency', 'elongation', 'fractal_dim', 'frames', 'kurtosis', 'straightness', 'trappedness')
            vmin = (1.36, 0.015, 0.72, -0.09, 0, 0, 0, 0.5, 0.049, 0.089, 0.0069, 0.65, 0.26, 1.28, 0, 1.66, 0.087, -0.225)
            vmax = (3.98, 2.6, 2.3, 0.015, max(merged_ft['Track_ID']), 2048, 2048, 0.99, 0.415, 0.53,
                    0.062, 3.44, 0.75, 1.79, 650, 3.33, 0.52, -0.208)
            die = {'features': features,
                   'vmin': vmin,
                   'vmax': vmax}
            di = pd.DataFrame(data=die)
            for i in range(0, di.shape[0]):
                hm.plot_heatmap(prefix, feature=di['features'][i], vmin=di['vmin'][i], vmax=di['vmax'][i])
                hm.plot_scatterplot(prefix, feature=di['features'][i], vmin=di['vmin'][i], vmax=di['vmax'][i])

            hm.plot_trajectories(prefix)
            try:
                hm.plot_histogram(prefix)
            except ValueError:
                print("Couldn't plot histogram.")
            hm.plot_particles_in_frame(prefix)
            gmean1, gSEM1 = hm.plot_individual_msds(prefix, alpha=0.05)
def sensitivity_it(counter):

    import matplotlib as mpl
    mpl.use('Agg')
    import matplotlib.pyplot as plt
    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.imagej as ij
    import diff_classifier.heatmaps as hm

    from scipy.spatial import Voronoi
    import scipy.stats as stats
    from shapely.geometry import Point
    from shapely.geometry.polygon import Polygon
    import matplotlib.cm as cm
    import os
    import os.path as op
    import numpy as np
    import numpy.ma as ma
    import pandas as pd
    import boto3
    import itertools

    #Sweep parameters
    #----------------------------------
    radius = [4.5, 6.0, 7.0]
    do_median_filtering = [True, False]
    quality = [1.5, 4.5, 8.5]
    linking_max_distance = [6.0, 10.0, 15.0]
    gap_closing_max_distance = [6.0, 10.0, 15.0]
    max_frame_gap = [1, 2, 5]
    track_displacement = [0.0, 10.0, 20.0]

    sweep = [
        radius, do_median_filtering, quality, linking_max_distance,
        gap_closing_max_distance, max_frame_gap, track_displacement
    ]
    all_params = list(itertools.product(*sweep))

    #Variable prep
    #----------------------------------
    s3 = boto3.client('s3')

    folder = '01_18_Experiment'
    s_folder = '{}/sensitivity'.format(folder)
    local_folder = '.'
    prefix = "P1_S1_R_0001_2_2"
    name = "{}.tif".format(prefix)
    local_im = op.join(local_folder, name)
    aws.download_s3(
        '{}/{}/{}.tif'.format(folder,
                              prefix.split('_')[0], prefix),
        '{}.tif'.format(prefix))

    outputs = np.zeros((len(all_params), len(all_params[0]) + 2))

    #Tracking and calculations
    #------------------------------------
    params = all_params[counter]
    outfile = 'Traj_{}_{}.csv'.format(name.split('.')[0], counter)
    msd_file = 'msd_{}_{}.csv'.format(name.split('.')[0], counter)
    geo_file = 'geomean_{}_{}.csv'.format(name.split('.')[0], counter)
    geoS_file = 'geoSEM_{}_{}.csv'.format(name.split('.')[0], counter)
    msd_image = 'msds_{}_{}.png'.format(name.split('.')[0], counter)
    iter_name = "{}_{}".format(prefix, counter)

    ij.track(local_im,
             outfile,
             template=None,
             fiji_bin=None,
             radius=params[0],
             threshold=0.,
             do_median_filtering=params[1],
             quality=params[2],
             x=511,
             y=511,
             ylo=1,
             median_intensity=300.0,
             snr=0.0,
             linking_max_distance=params[3],
             gap_closing_max_distance=params[4],
             max_frame_gap=params[5],
             track_displacement=params[6])

    traj = ut.csv_to_pd(outfile)
    msds = msd.all_msds2(traj, frames=651)
    msds.to_csv(msd_file)
    gmean1, gSEM1 = hm.plot_individual_msds(iter_name, alpha=0.05)
    np.savetxt(geo_file, gmean1, delimiter=",")
    np.savetxt(geoS_file, gSEM1, delimiter=",")

    aws.upload_s3(outfile, '{}/{}'.format(s_folder, outfile))
    aws.upload_s3(msd_file, '{}/{}'.format(s_folder, msd_file))
    aws.upload_s3(geo_file, '{}/{}'.format(s_folder, geo_file))
    aws.upload_s3(geoS_file, '{}/{}'.format(s_folder, geoS_file))
    aws.upload_s3(msd_image, '{}/{}'.format(s_folder, msd_image))

    print('Successful parameter calculations for {}'.format(iter_name))
Exemple #17
0
def regress_sys(folder, all_videos, y, training_size, have_output=True):
    """
    Uses regression techniques to select the best tracking parameters.
    Regression again intensities of input images.

    Parameters
    ----------
    all_videos: list
        Contains prefixes of video filenames of entire video set to be
        tracked.  Training dataset will be some subset of these videos.
    y: numpy array
        Contains manually acquired quality levels using Trackmate for the
        files contained in the training dataset.
    training_size: int
        Number of files in training dataset.
    have_output: boolean
        If you have already acquired the quality values (y) for the
        training dataset, set to True.  If False, it will output the files
        the user will need to acquire quality values for.

    Returns
    -------
    regress_object: list of sklearn regression objects.
        Contains list of regression objects assembled from the training
        datasets.  Uses the mean, 10th percentile, 90th percentile, and
        standard deviation intensities to predict the quality parameter
        in Trackmate.
    """

    tprefix = []
    for i in range(0, training_size):
        random.seed(i + 1)
        tprefix.append(all_videos[random.randint(0, len(all_videos))])
        if have_output is False:
            print("Get parameters for: {}".format(tprefix[i]))

    if have_output is True:
        # Define descriptors
        descriptors = np.zeros((training_size, 4))
        counter = 0
        for name in tprefix:
            pup = name.split('_')[0]
            local_im = name + '.tif'
            remote_im = "{}/{}/{}".format(folder, pup, local_im)
            aws.download_s3(remote_im, local_im)
            test_image = sio.imread(local_im)
            descriptors[counter, 0] = np.mean(test_image[0, :, :])
            descriptors[counter, 1] = np.std(test_image[0, :, :])
            descriptors[counter, 2] = np.percentile(test_image[0, :, :], 10)
            descriptors[counter, 3] = np.percentile(test_image[0:, :, :], 90)
            counter = counter + 1

        # Define regression techniques
        X = descriptors
        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        regress_object = []
        for item in classifiers:
            clf = item
            regress_object.append(clf.fit(X, y))

        return regress_object