def geomean_msd(prefix, umppx=0.16, fps=100.02, upload=True, remote_folder="01_18_Experiment", bucket='ccurtis.data', backup_frames=651): import pandas as pd import numpy as np import numpy.ma as ma import diff_classifier.aws as aws import scipy.stats as stats aws.download_s3('{}/msd_{}.csv'.format(remote_folder, prefix), 'msd_{}.csv'.format(prefix), bucket_name=bucket) merged = pd.read_csv('msd_{}.csv'.format(prefix)) try: particles = int(max(merged['Track_ID'])) frames = int(max(merged['Frame'])) ypos = np.zeros((particles+1, frames+1)) for i in range(0, particles+1): ypos[i, :] = merged.loc[merged.Track_ID == i, 'MSDs']*umppx*umppx xpos = merged.loc[merged.Track_ID == i, 'Frame']/fps geo_mean = np.nanmean(ma.log(ypos), axis=0) geo_stder = ma.masked_equal(stats.sem(ma.log(ypos), axis=0, nan_policy='omit'), 0.0) except ValueError: geo_mean = np.nan*np.ones(backup_frames) geo_stder = np.nan*np.ones(backup_frames) np.savetxt('geomean_{}.csv'.format(prefix), geo_mean, delimiter=",") np.savetxt('geoSEM_{}.csv'.format(prefix), geo_stder, delimiter=",") if upload: aws.upload_s3('geomean_{}.csv'.format(prefix), remote_folder+'/'+'geomean_{}.csv'.format(prefix), bucket_name=bucket) aws.upload_s3('geoSEM_{}.csv'.format(prefix), remote_folder+'/'+'geoSEM_{}.csv'.format(prefix), bucket_name=bucket) return geo_mean, geo_stder
def download_and_track(filename): import diff_classifier.imagej as ij import diff_classifier.utils as ut import diff_classifier.aws as aws import os.path as op import pandas as pd aws.download_s3(filename, op.split(filename)[1]) outfile = 'Traj_' + op.split(filename)[1].split('.')[0] + '.csv' local_im = op.split(filename)[1] if not op.isfile(outfile): ij.track(local_im, outfile, template=None, fiji_bin=None, radius=4.5, threshold=0., do_median_filtering=True, quality=4.5, median_intensity=300.0, snr=0.0, linking_max_distance=8.0, gap_closing_max_distance=10.0, max_frame_gap=2, track_displacement=10.0) aws.upload_s3(outfile, op.split(filename)[0]+'/'+outfile) print("Done with tracking. Should output file of name {}".format(op.split(filename)[0]+'/'+outfile))
def download_and_split(filename): import diff_classifier.imagej as ij import diff_classifier.aws as aws import os.path as op local_name = op.split(filename)[1] DIR = op.split(filename)[0] try1 = filename.split('.')[0] + '_0_0.tif' try2 = filename.split('.')[0] + '_3_3.tif' s3 = boto3.client('s3') try: obj = s3.head_object(Bucket='ccurtis7.pup', Key=try1) except: try: obj = s3.head_object(Bucket='ccurtis7.pup', Key=try2) except: aws.download_s3(filename, local_name) names = ij.partition_im(local_name) for name in names: aws.upload_s3(name, op.split(filename)[0]+'/'+name) print("Done with splitting. Should output file of name {}".format(op.split(filename)[0]+'/'+name))
def download_and_calc_MSDs(prefix): import diff_classifier.aws as aws import diff_classifier.utils as ut import diff_classifier.msd as msd import diff_classifier.features as ft import os import os.path as op import numpy as np import numpy.ma as ma import pandas as pd remote_folder = "01_18_Experiment/{}".format(prefix.split('_')[0]) local_folder = os.getcwd() ires = 512 for row in range(0, 4): for col in range(0, 4): filename = "Traj_{}_{}_{}.csv".format(prefix, row, col) to_download = remote_folder+'/'+filename local_name = local_folder+'/'+filename aws.download_s3(to_download, local_name) if row==0 and col==0: merged = msd.all_msds(ut.csv_to_pd(local_name)) else: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires*row to_add['Y'] = to_add['Y'] + ires*col to_add['Track_ID'] = to_add['Track_ID'] + max(merged['Track_ID']) merged.append(msd.all_msds(to_add)) print('Successfully downloaded and calculated MSDs for {}_{}_{}'.format(prefix, row, col)) merged.to_csv('MSD_{}.csv'.format(prefix)) print('Saved MSDs as MSD_{}.csv'.format(prefix)) merged_ft = ft.calculate_features(merged) merged_ft.to_csv('features_{}.csv'.format(prefix)) print('Saved features as features_{}.csv'.format(prefix))
def split(prefix, remote_folder, bucket, rows=4, cols=4, ores=(2048, 2048), ires=(512, 512)): '''Splits input image file into smaller images. A function based on imagej.partition_im that download images from an S3 bucket, splits it into smaller images, and uploads these to S3. Designed to work with Cloudknot for parallelizable workflows. Typically, this function is used in conjunction with kn.tracking and kn.assemble_msds for a complete analysis. Parameters ---------- prefix : string Prefix (everything except file extension and folder name) of image file to be tracked. Must be available on S3. remote_folder : string Folder name where file is contained on S3 in the bucket specified by 'bucket'. bucket : string S3 bucket where file is contained. rows : int Number of rows to split image into. cols : int Number of columns to split image into. ores : tuple of int Original resolution of input image. ires : tuple of int Resolution of split images. Really just a sanity check to make sure you correctly splitting. ''' import os import boto3 import diff_classifier.aws as aws import diff_classifier.imagej as ij local_folder = os.getcwd() filename = '{}.tif'.format(prefix) remote_name = remote_folder+'/'+filename local_name = local_folder+'/'+filename msd_file = 'msd_{}.csv'.format(prefix) ft_file = 'features_{}.csv'.format(prefix) aws.download_s3(remote_name, local_name, bucket_name=bucket) s3 = boto3.client('s3') # Splitting section names = ij.partition_im(local_name, irows=rows, icols=cols, ores=ores, ires=ires) # Names of subfiles # names = [] # for i in range(0, 4): # for j in range(0, 4): # names.append('{}_{}_{}.tif'.format(prefix, i, j)) for name in names: aws.upload_s3(name, remote_folder+'/'+name, bucket_name=bucket) os.remove(name) print("Done with splitting. Should output file of name {}".format( remote_folder+'/'+name)) os.remove(filename)
def assemble_msds(prefix, remote_folder, bucket, ires=(512, 512), frames=651): '''Calculates MSDs and features from input trajectory files A function based on msd.all_msds2 and features.calculate_features, creates msd and feature csv files from input trajectory files and uploads to S3. Designed to work with Cloudknot for parallelizable workflows. Typically, this function is used in conjunction with kn.split and kn.tracking for an entire workflow. prefix : string Prefix (everything except file extension and folder name) of image file to be tracked. Must be available on S3. remote_folder : string Folder name where file is contained on S3 in the bucket specified by 'bucket'. bucket : string S3 bucket where file is contained. ires : tuple of int Resolution of split images. Really just a sanity check to make sure you correctly splitting. frames : int Number of frames in input videos. ''' import os import boto3 import diff_classifier.aws as aws import diff_classifier.msd as msd import diff_classifier.features as ft import diff_classifier.utils as ut filename = '{}.tif'.format(prefix) remote_name = remote_folder+'/'+filename msd_file = 'msd_{}.csv'.format(prefix) ft_file = 'features_{}.csv'.format(prefix) s3 = boto3.client('s3') # names = [] # for i in range(0, 4): # for j in range(0, 4): # names.append('{}_{}_{}.tif'.format(prefix, i, j)) all_objects = s3.list_objects(Bucket=bucket, Prefix='{}/{}_'.format(remote_folder, prefix)) names = [] rows = 0 cols = 0 for entry in all_objects['Contents']: name = entry['Key'].split('/')[-1] names.append(name) row = int(name.split(prefix)[1].split('.')[0].split('_')[-2]) col = int(name.split(prefix)[1].split('.')[0].split('_')[-1]) if row > rows: rows = row if col > cols: cols = col rows = rows + 1 cols = cols + 1 counter = 0 for name in names: row = int(name.split(prefix)[1].split('.')[0].split('_')[-2]) col = int(name.split(prefix)[1].split('.')[0].split('_')[-1]) filename = "Traj_{}_{}_{}.csv".format(prefix, row, col) aws.download_s3(remote_folder+'/'+filename, filename, bucket_name=bucket) local_name = filename if counter == 0: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires[0]*col to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row) merged = msd.all_msds2(to_add, frames=frames) else: if merged.shape[0] > 0: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires[0]*col to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row) to_add['Track_ID'] = to_add['Track_ID' ] + max(merged['Track_ID']) + 1 else: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires[0]*col to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row) to_add['Track_ID'] = to_add['Track_ID'] merged = merged.append(msd.all_msds2(to_add, frames=frames)) print('Done calculating MSDs for row {} and col {}'.format(row, col)) counter = counter + 1 merged.to_csv(msd_file) aws.upload_s3(msd_file, remote_folder+'/'+msd_file, bucket_name=bucket) merged_ft = ft.calculate_features(merged) merged_ft.to_csv(ft_file) aws.upload_s3(ft_file, remote_folder+'/'+ft_file, bucket_name=bucket) os.remove(ft_file) os.remove(msd_file) for name in names: outfile = 'Traj_' + name.split('.')[0] + '.csv' os.remove(outfile)
def load_data(folder, filenames=[], **kwargs): """ Load data either through the system or through aws S3. Parameters ---------- folder : string : desired folder to import files from filenames : list of strings : desired files to import Optional Parameters ------------------- download_list_file : string : if using a textfile containing multiple filenames, use this to designate location of this file within the folder. ex: folder/download_file_names.txt tag : list of strings : if tagging a dataframe file with a variable, use this to tag each file. Will cycle through list if list reaches end and there are stile files in the filenames list bucket_name : string : if using aws S3, declare this variable as an S3 bucket to look through. This will trigger the function so that folder is the folder in the bucket and filenames are the filenames to download in the bucket """ data = pd.DataFrame() tag = None if 'download_list_file' in kwargs: list_path = os.path.join(folder, kwargs['download_list_file'][0]) assert os.path.isfile(list_path) and os.access(list_path, os.R_OK), \ f'{list_path} does not exhist or can not be read' try: with open(list_path, 'r') as f: filenames = f.read().splitlines() except IOError as err: print(f"Could not read {f}: {err}") if 'tag' in kwargs: tag = cycle(kwargs['tag']) if 'bucket_name' in kwargs: s3 = boto3.resource('s3') bucket = s3.Bucket(kwargs['bucket_name']) for filename in filenames: if tag: file_tag = next(tag) else: file_tag = None try: file_path = os.path.join(folder, filename) print(file_path) aws.download_s3(file_path, filename, bucket_name=bucket) file_data = pd.read_csv(filename, encoding="ISO-8859-1", index_col='Unnamed: 0') if file_tag: size = file_data.shape[0] file_data['Tag'] = pd.Series(size*[file_tag], index=file_data.index) data = pd.concat([data, file_data]) del file_data except IOError as err: print(f'Skipped!: {filename}: {err}') return data for filename in filenames: if tag: file_tag = next(tag) else: file_tag = None try: file_path = os.path.join(folder, filename) print(file_path) if file_tag: size = file_data.shape[0] data = pd.concat([data, file_data]) except IOError as err: print(f'Skipped!: {filename}: {err}') return data
def plot_all_experiments(experiments, bucket='ccurtis.data', folder='test', yrange=(10**-1, 10**1), fps=100.02, xrange=(10**-2, 10**0), upload=True, outfile='test.png', exponential=True): """Plots precision-weighted averages of MSD datasets. Plots pre-calculated precision-weighted averages of MSD datasets calculated from precision_averaging and stored in an AWS S3 bucket. Parameters ---------- group : list of str List of experiment names to plot. Each experiment must have an MSD and SEM file associated with it in s3. bucket : str S3 bucket from which to download data. folder : str Folder in s3 bucket from which to download data. yrange : list of float Y range of plot xrange: list of float X range of plot upload : bool True to upload to S3 outfile : str Filename of output image """ n = len(experiments) color = iter(cm.viridis(np.linspace(0, 0.9, n))) fig = plt.figure(figsize=(8.5, 8.5)) plt.xlim(xrange[0], xrange[1]) plt.ylim(yrange[0], yrange[1]) plt.xlabel('Tau (s)', fontsize=25) plt.ylabel(r'Mean Squared Displacement ($\mu$m$^2$)', fontsize=25) geo = {} gstder = {} counter = 0 for experiment in experiments: aws.download_s3('{}/geomean_{}.csv'.format(folder, experiment), 'geomean_{}.csv'.format(experiment), bucket_name=bucket) aws.download_s3('{}/geoSEM_{}.csv'.format(folder, experiment), 'geoSEM_{}.csv'.format(experiment), bucket_name=bucket) geo[counter] = np.genfromtxt('geomean_{}.csv'.format(experiment)) gstder[counter] = np.genfromtxt('geoSEM_{}.csv'.format(experiment)) geo[counter] = ma.masked_equal(geo[counter], 0.0) gstder[counter] = ma.masked_equal(gstder[counter], 0.0) frames = np.shape(gstder[counter])[0] xpos = np.linspace(0, frames-1, frames)/fps c = next(color) if exponential: plt.loglog(xpos, np.exp(geo[counter]), c=c, linewidth=6, label=experiment) plt.loglog(xpos, np.exp(geo[counter] - 1.96*gstder[counter]), c=c, dashes=[6, 2], linewidth=4) plt.loglog(xpos, np.exp(geo[counter] + 1.96*gstder[counter]), c=c, dashes=[6, 2], linewidth=4) else: plt.loglog(xpos, geo[counter], c=c, linewidth=6, label=experiment) plt.loglog(xpos, geo[counter] - 1.96*gstder[counter], c=c, dashes=[6, 2], linewidth=4) plt.loglog(xpos, geo[counter] + 1.96*gstder[counter], c=c, dashes=[6, 2], linewidth=4) counter = counter + 1 plt.legend(frameon=False, prop={'size': 16}) if upload: fig.savefig(outfile, bbox_inches='tight') aws.upload_s3(outfile, folder+'/'+outfile, bucket_name=bucket)
def regress_sys(folder, all_videos, yfit, training_size, randselect=True, trainingdata=[], frame=0, have_output=True, download=True, bucket_name='ccurtis.data'): """Uses regression based on image intensities to select tracking parameters. This function uses regression methods from the scikit-learn module to predict the lower quality cutoff values for particle filtering in TrackMate based on the intensity distributions of input images. Currently only uses the first frame of videos for analysis, and is limited to predicting quality values. In practice, users will run regress_sys twice in different modes to build a regression system. First, set have_output to False. Function will return list of randomly selected videos to include in the training dataset. The user should then manually track particles using the Trackmate GUI, and enter these values in during the next round as the input yfit variable. Parameters ---------- folder : str S3 directory containing video files specified in all_videos. all_videos: list of str Contains prefixes of video filenames of entire video set to be tracked. Training dataset will be some subset of these videos. yfit: numpy.ndarray Contains manually acquired quality levels using Trackmate for the files contained in the training dataset. training_size : int Number of files in training dataset. randselect : bool If True, will randomly select training videos from all_videos. If False, will use trainingdata as input training dataset. trainingdata : list of str Optional manually selected prefixes of video filenames to be used as training dataset. have_output: bool If you have already acquired the quality values (yfit) for the training dataset, set to True. If False, it will output the files the user will need to acquire quality values for. bucket_name : str S3 bucket containing videos to be downloaded for regression calculations. Returns ------- regress_object : list of sklearn.svm.classes. Contains list of regression objects assembled from the training datasets. Uses the mean, 10th percentile, 90th percentile, and standard deviation intensities to predict the quality parameter in Trackmate. tprefix : list of str Contains randomly selected images from all_videos to be included in training dataset. """ if randselect: tprefix = [] for i in range(0, training_size): random.seed(i + 1) tprefix.append(all_videos[random.randint(0, len(all_videos))]) if have_output is False: print("Get parameters for: {}".format(tprefix[i])) else: tprefix = trainingdata if have_output is True: # Define descriptors descriptors = np.zeros((training_size, 4)) counter = 0 for name in tprefix: local_im = name + '.tif' remote_im = "{}/{}".format(folder, local_im) if download: aws.download_s3(remote_im, local_im, bucket_name=bucket_name) test_image = sio.imread(local_im) descriptors[counter, 0] = np.mean(test_image[frame, :, :]) descriptors[counter, 1] = np.std(test_image[frame, :, :]) descriptors[counter, 2] = np.percentile(test_image[frame, :, :], 10) descriptors[counter, 3] = np.percentile(test_image[frame, :, :], 90) counter = counter + 1 # Define regression techniques xfit = descriptors classifiers = [ svm.SVR(), linear_model.SGDRegressor(), linear_model.BayesianRidge(), linear_model.LassoLars(), linear_model.ARDRegression(), linear_model.PassiveAggressiveRegressor(), linear_model.TheilSenRegressor(), linear_model.LinearRegression() ] regress_object = [] for item in classifiers: clf = item regress_object.append(clf.fit(xfit, yfit)) return regress_object else: return tprefix
def BF_cell_features(prefix, folder, bucket='ccurtis.data'): ffilename = 'features_{}.csv'.format(prefix) mfilename = 'msd_{}.csv'.format(prefix) bffilename = 'BF_cells_{}.tif'.format(prefix) biim = 'bi_BF_cells_{}.tif'.format(prefix) bimages = 'biproc_BF_cells_{}.png'.format(prefix) aws.download_s3('{}/{}'.format(folder, ffilename), ffilename, bucket_name=bucket) aws.download_s3('{}/{}'.format(folder, mfilename), mfilename, bucket_name=bucket) aws.download_s3('{}/{}'.format(folder, bffilename), bffilename, bucket_name=bucket) print('Successfully downloaded files') fstats = pd.read_csv(ffilename, encoding="ISO-8859-1") msds = pd.read_csv(mfilename, encoding="ISO-8859-1") bfimage = plt.imread(bffilename) tophimage = binary_BF(bfimage, opense=disk(12), bi_thresh=1.2, tophatse=disk(20)) plt.savefig(bimages) euimage = EuclideanTransform(tophimage) + EuclideanTransform(~tophimage) print('Successfully performed image processing') xa = -np.reshape(np.clip( (fstats.Y.values - 1).astype(int), a_min=0, a_max=2043), newshape=(fstats.Y.shape[0], 1)) ya = np.reshape(np.clip((fstats.X.values - 1).astype(int), a_min=0, a_max=2043), newshape=(fstats.X.shape[0], 1)) xya = [tuple(l) for l in np.concatenate((xa, ya), axis=1).tolist()] fstats['Cell Status'] = itemgetter(*xya)(tophimage) fstats['Cell Distance'] = itemgetter(*xya)(euimage) print('Successfully calculated Cell Status Params') frames = 651 xb = -np.reshape(np.clip( (msds.Y.values - 1).astype(int), a_min=0, a_max=2043), newshape=(int(msds.Y.shape[0]), 1)) yb = np.reshape(np.clip((msds.X.values - 1).astype(int), a_min=0, a_max=2043), newshape=(int(msds.X.shape[0]), 1)) xyb = [tuple(l) for l in np.concatenate((xb, yb), axis=1).tolist()] msds['Cell Status'] = itemgetter(*xyb)(tophimage) msds['Cell Distance'] = itemgetter(*xyb)(euimage) msds_cell_status = np.reshape(msds['Cell Status'].values, newshape=(int(msds.X.shape[0] / frames), frames)) msds_cell_distance = np.reshape(msds['Cell Distance'].values, newshape=(int(msds.X.shape[0] / frames), frames)) fstats['Membrane Xing'] = np.sum(np.diff(msds_cell_status, axis=1) == True, axis=1) fstats['Distance Towards Cell'] = np.sum(np.diff(msds_cell_distance, axis=1), axis=1) fstats['Percent Towards Cell'] = np.mean( np.diff(msds_cell_distance, axis=1) > 0, axis=1) print('Successfully calculated Membrane Xing Params') fstats.to_csv(ffilename, sep=',', encoding="ISO-8859-1") msds.to_csv(mfilename, sep=',', encoding="ISO-8859-1") plt.imsave(biim, tophimage, cmap='gray') aws.upload_s3(ffilename, '{}/{}'.format(folder, ffilename), bucket_name=bucket) aws.upload_s3(mfilename, '{}/{}'.format(folder, mfilename), bucket_name=bucket) aws.upload_s3(biim, '{}/{}'.format(folder, biim), bucket_name=bucket) aws.upload_s3(bimages, '{}/{}'.format(folder, bimages, bucket_name=bucket)) print('Successfully uploaded files') return fstats
to_track = [] frames = 651 fps = 100.02 umppx = 0.07 vids = 5 covers = ['10K', '1K', '5K', 'COOH'] slices = [4, 5, 6] for cover in covers: for slic in slices: for num in range(1, vids + 1): #to_track.append('100x_0_4_1_2_gel_{}_bulk_vid_{}'.format(vis, num)) to_track.append('{}_tissue_S{}_XY{}'.format(cover, slic, num)) geomean = {} gSEM = {} for sample_name in to_track[int(sys.argv[1]):int(sys.argv[2])]: # Users can toggle between using pre-calculated geomean files and calculating new values by commenting out the relevant # lines of code within the for loop. #aws.download_s3('{}/geomean_{}.csv'.format(folder, sample_name), 'geomean_{}.csv'.format(sample_name), bucket_name=bucket) #aws.download_s3('{}/geoSEM_{}.csv'.format(folder, sample_name), 'geoSEM_{}.csv'.format(sample_name), bucket_name=bucket) #geomean[sample_name] = np.genfromtxt('geomean_{}.csv'.format(sample_name)) #gSEM[sample_name] = np.genfromtxt('geoSEM_{}.csv'.format(sample_name)) aws.download_s3('{}/msd_{}.csv'.format(folder, sample_name), 'msd_{}.csv'.format(sample_name), bucket_name=bucket) geomean[sample_name], gSEM[sample_name] = msd.geomean_msdisp( sample_name, umppx=umppx, fps=fps, remote_folder=folder, bucket=bucket) print('Done with {}'.format(sample_name))
def tracking( subprefix, remote_folder, bucket='nancelab.publicfiles', regress_f='regress.obj', rows=4, cols=4, ires=(512, 512), tparams={ 'frames': 651, 'radius': 3.0, 'threshold': 0.0, 'do_median_filtering': False, 'quality': 15.0, 'xdims': (0, 511), 'ydims': (1, 511), 'median_intensity': 300.0, 'snr': 0.0, 'linking_max_distance': 6.0, 'gap_closing_max_distance': 10.0, 'max_frame_gap': 3, 'track_duration': 20.0 }): '''Tracks particles in input image using Trackmate. A function based on imagej.track that downloads the image from S3, tracks particles using Trackmate, and uploads the resulting trajectory file to S3. Parameters ---------- subprefix : string Prefix (everything except file extension and folder name) of image file to be tracked. Must be available on S3. remote_folder : string Folder name where file is contained on S3 in the bucket specified by 'bucket'. bucket : string S3 bucket where file is contained. regress_f : string Name of regress object used to predict quality parameter. rows : int Number of rows to split image into. cols : int Number of columns to split image into. ires : tuple of int Resolution of split images. Really just a sanity check to make sure you correctly splitting. tparams : dict Dictionary containing tracking parameters to Trackmate analysis. ''' import os import os.path as op import boto3 from sklearn.externals import joblib import diff_classifier.aws as aws import diff_classifier.utils as ut import diff_classifier.msd as msd import diff_classifier.features as ft import diff_classifier.imagej as ij local_folder = os.getcwd() filename = '{}.tif'.format(subprefix) remote_name = remote_folder + '/' + filename local_name = local_folder + '/' + filename outfile = 'Traj_' + subprefix + '.csv' local_im = op.join(local_folder, '{}.tif'.format(subprefix)) row = int(subprefix.split('_')[-2]) col = int(subprefix.split('_')[-1]) aws.download_s3(remote_folder + '/' + regress_f, regress_f, bucket_name=bucket) with open(regress_f, 'rb') as fp: regress = joblib.load(fp) s3 = boto3.client('s3') aws.download_s3('{}/{}'.format(remote_folder, '{}.tif'.format(subprefix)), local_im, bucket_name=bucket) tparams['quality'] = ij.regress_tracking_params( regress, subprefix, regmethod='PassiveAggressiveRegressor') if row == rows - 1: tparams['ydims'] = (tparams['ydims'][0], ires[1] - 27) ij.track(local_im, outfile, template=None, fiji_bin=None, tparams=tparams) aws.upload_s3(outfile, remote_folder + '/' + outfile, bucket_name=bucket) print("Done with tracking. Should output file of name {}".format( remote_folder + '/' + outfile))
def assemble_msds(prefix, remote_folder, bucket='nancelab.publicfiles', ires=(512, 512), frames=651, rows=4, cols=4): '''Calculates MSDs and features from input trajectory files A function based on msd.all_msds2 and features.calculate_features, creates msd and feature csv files from input trajectory files and uploads to S3. prefix : string Prefix (everything except file extension and folder name) of image file to be tracked. Must be available on S3. remote_folder : string Folder name where file is contained on S3 in the bucket specified by 'bucket'. bucket : string S3 bucket where file is contained. ires : tuple of int Resolution of split images. Really just a sanity check to make sure you correctly splitting. frames : int Number of frames in input videos. rows : int Number of rows to split image into. cols : int Number of columns to split image into. ''' import os import boto3 import diff_classifier.aws as aws import diff_classifier.msd as msd import diff_classifier.features as ft import diff_classifier.utils as ut filename = '{}.tif'.format(prefix) remote_name = remote_folder + '/' + filename msd_file = 'msd_{}.csv'.format(prefix) ft_file = 'features_{}.csv'.format(prefix) s3 = boto3.client('s3') names = [] for i in range(0, 4): for j in range(0, 4): names.append('{}_{}_{}.tif'.format(prefix, i, j)) counter = 0 for name in names: row = int(name.split(prefix)[1].split('.')[0].split('_')[1]) col = int(name.split(prefix)[1].split('.')[0].split('_')[2]) filename = "Traj_{}_{}_{}.csv".format(prefix, row, col) aws.download_s3(remote_folder + '/' + filename, filename, bucket_name=bucket) local_name = filename if counter == 0: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires[0] * col to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 - row) merged = msd.all_msds2(to_add, frames=frames) else: if merged.shape[0] > 0: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires[0] * col to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 - row) to_add['Track_ID'] = to_add['Track_ID'] + max( merged['Track_ID']) + 1 else: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires[0] * col to_add['Y'] = ires[1] - to_add['Y'] + ires[1] * (rows - 1 - row) to_add['Track_ID'] = to_add['Track_ID'] merged = merged.append(msd.all_msds2(to_add, frames=frames)) print('Done calculating MSDs for row {} and col {}'.format( row, col)) counter = counter + 1 merged.to_csv(msd_file) aws.upload_s3(msd_file, remote_folder + '/' + msd_file, bucket_name=bucket) merged_ft = ft.calculate_features(merged) merged_ft.to_csv(ft_file) aws.upload_s3(ft_file, remote_folder + '/' + ft_file, bucket_name=bucket) os.remove(ft_file) os.remove(msd_file) for name in names: outfile = 'Traj_' + name.split('.')[0] + '.csv' os.remove(outfile)
def tracking(subprefix, remote_folder, bucket, tparams, regress_f='regress.obj', rows=4, cols=4, ires=(512, 512)): '''Tracks particles in input image using Trackmate. A function based on imagej.track that downloads the image from S3, tracks particles using Trackmate, and uploads the resulting trajectory file to S3. Designed to work with Cloudknot for parallelizable workflows. Typically, this function is used in conjunction with kn.split and kn.assemble_msds for a complete analysis. Parameters ---------- subprefix : string Prefix (everything except file extension and folder name) of image file to be tracked. Must be available on S3. remote_folder : string Folder name where file is contained on S3 in the bucket specified by 'bucket'. bucket : string S3 bucket where file is contained. regress_f : string Name of regress object used to predict quality parameter. rows : int Number of rows to split image into. cols : int Number of columns to split image into. ires : tuple of int Resolution of split images. Really just a sanity check to make sure you correctly splitting. tparams : dict Dictionary containing tracking parameters to Trackmate analysis. ''' import os import os.path as op import boto3 from sklearn.externals import joblib import diff_classifier.aws as aws import diff_classifier.utils as ut import diff_classifier.msd as msd import diff_classifier.features as ft import diff_classifier.imagej as ij local_folder = os.getcwd() filename = '{}.tif'.format(subprefix) remote_name = remote_folder+'/'+filename local_name = local_folder+'/'+filename outfile = 'Traj_' + subprefix + '.csv' local_im = op.join(local_folder, '{}.tif'.format(subprefix)) row = int(subprefix.split('_')[-2]) col = int(subprefix.split('_')[-1]) aws.download_s3(remote_folder+'/'+regress_f, regress_f, bucket_name=bucket) with open(regress_f, 'rb') as fp: regress = joblib.load(fp) s3 = boto3.client('s3') aws.download_s3('{}/{}'.format(remote_folder, '{}.tif'.format(subprefix)), local_im, bucket_name=bucket) tparams['quality'] = ij.regress_tracking_params(regress, subprefix, regmethod='PassiveAggressiveRegressor') if row == rows-1: tparams['ydims'] = (tparams['ydims'][0], ires[1] - 27) ij.track(local_im, outfile, template=None, fiji_bin=None, tparams=tparams) aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket) print("Done with tracking. Should output file of name {}".format( remote_folder+'/'+outfile))
def download_split_track_msds(prefix): """ 1. Checks to see if features file exists. 2. If not, checks to see if image partitioning has occured. 3. If yes, checks to see if tracking has occured. 4. Regardless, tracks, calculates MSDs and features. """ import matplotlib as mpl mpl.use('Agg') import diff_classifier.aws as aws import diff_classifier.utils as ut import diff_classifier.msd as msd import diff_classifier.features as ft import diff_classifier.imagej as ij import diff_classifier.heatmaps as hm from scipy.spatial import Voronoi import scipy.stats as stats from shapely.geometry import Point from shapely.geometry.polygon import Polygon import matplotlib.cm as cm import os import os.path as op import numpy as np import numpy.ma as ma import pandas as pd import boto3 #Splitting section ############################################################################################### remote_folder = "01_18_Experiment/{}".format(prefix.split('_')[0]) local_folder = os.getcwd() ires = 512 frames = 651 filename = '{}.tif'.format(prefix) remote_name = remote_folder+'/'+filename local_name = local_folder+'/'+filename msd_file = 'msd_{}.csv'.format(prefix) ft_file = 'features_{}.csv'.format(prefix) s3 = boto3.client('s3') names = [] for i in range(0, 4): for j in range(0, 4): names.append('{}_{}_{}.tif'.format(prefix, i, j)) try: obj = s3.head_object(Bucket='ccurtis7.pup', Key=remote_folder+'/'+ft_file) except: try: for name in names: aws.download_s3(remote_folder+'/'+name, name) except: aws.download_s3(remote_name, local_name) names = ij.partition_im(local_name) for name in names: aws.upload_s3(name, remote_folder+'/'+name) print("Done with splitting. Should output file of name {}".format(remote_folder+'/'+name)) #Tracking section ################################################################################################ for name in names: outfile = 'Traj_' + name.split('.')[0] + '.csv' local_im = op.join(local_folder, name) row = int(name.split('.')[0].split('_')[4]) col = int(name.split('.')[0].split('_')[5]) try: aws.download_s3(remote_folder+'/'+outfile, outfile) except: test_intensity = ij.mean_intensity(local_im) if test_intensity > 500: quality = 245 else: quality = 4.5 if row==3: y = 485 else: y = 511 ij.track(local_im, outfile, template=None, fiji_bin=None, radius=4.5, threshold=0., do_median_filtering=True, quality=quality, x=511, y=y, ylo=1, median_intensity=300.0, snr=0.0, linking_max_distance=8.0, gap_closing_max_distance=10.0, max_frame_gap=2, track_displacement=10.0) aws.upload_s3(outfile, remote_folder+'/'+outfile) print("Done with tracking. Should output file of name {}".format(remote_folder+'/'+outfile)) #MSD and features section ################################################################################################# files_to_big = False size_limit = 10 for name in names: outfile = 'Traj_' + name.split('.')[0] + '.csv' local_im = name file_size_MB = op.getsize(local_im)/1000000 if file_size_MB > size_limit: file_to_big = True if files_to_big: print('One or more of the {} trajectory files exceeds {}MB in size. Will not continue with MSD calculations.'.format( prefix, size_limit)) else: counter = 0 for name in names: row = int(name.split('.')[0].split('_')[4]) col = int(name.split('.')[0].split('_')[5]) filename = "Traj_{}_{}_{}.csv".format(prefix, row, col) local_name = local_folder+'/'+filename if counter == 0: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires*col to_add['Y'] = ires - to_add['Y'] + ires*(3-row) merged = msd.all_msds2(to_add, frames=frames) else: if merged.shape[0] > 0: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires*col to_add['Y'] = ires - to_add['Y'] + ires*(3-row) to_add['Track_ID'] = to_add['Track_ID'] + max(merged['Track_ID']) + 1 else: to_add = ut.csv_to_pd(local_name) to_add['X'] = to_add['X'] + ires*col to_add['Y'] = ires - to_add['Y'] + ires*(3-row) to_add['Track_ID'] = to_add['Track_ID'] merged = merged.append(msd.all_msds2(to_add, frames=frames)) print('Done calculating MSDs for row {} and col {}'.format(row, col)) counter = counter + 1 merged.to_csv(msd_file) aws.upload_s3(msd_file, remote_folder+'/'+msd_file) merged_ft = ft.calculate_features(merged) merged_ft.to_csv(ft_file) aws.upload_s3(ft_file, remote_folder+'/'+ft_file) #Plots features = ('AR', 'D_fit', 'alpha', 'MSD_ratio', 'Track_ID', 'X', 'Y', 'asymmetry1', 'asymmetry2', 'asymmetry3', 'boundedness', 'efficiency', 'elongation', 'fractal_dim', 'frames', 'kurtosis', 'straightness', 'trappedness') vmin = (1.36, 0.015, 0.72, -0.09, 0, 0, 0, 0.5, 0.049, 0.089, 0.0069, 0.65, 0.26, 1.28, 0, 1.66, 0.087, -0.225) vmax = (3.98, 2.6, 2.3, 0.015, max(merged_ft['Track_ID']), 2048, 2048, 0.99, 0.415, 0.53, 0.062, 3.44, 0.75, 1.79, 650, 3.33, 0.52, -0.208) die = {'features': features, 'vmin': vmin, 'vmax': vmax} di = pd.DataFrame(data=die) for i in range(0, di.shape[0]): hm.plot_heatmap(prefix, feature=di['features'][i], vmin=di['vmin'][i], vmax=di['vmax'][i]) hm.plot_scatterplot(prefix, feature=di['features'][i], vmin=di['vmin'][i], vmax=di['vmax'][i]) hm.plot_trajectories(prefix) try: hm.plot_histogram(prefix) except ValueError: print("Couldn't plot histogram.") hm.plot_particles_in_frame(prefix) gmean1, gSEM1 = hm.plot_individual_msds(prefix, alpha=0.05)
def sensitivity_it(counter): import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import diff_classifier.aws as aws import diff_classifier.utils as ut import diff_classifier.msd as msd import diff_classifier.features as ft import diff_classifier.imagej as ij import diff_classifier.heatmaps as hm from scipy.spatial import Voronoi import scipy.stats as stats from shapely.geometry import Point from shapely.geometry.polygon import Polygon import matplotlib.cm as cm import os import os.path as op import numpy as np import numpy.ma as ma import pandas as pd import boto3 import itertools #Sweep parameters #---------------------------------- radius = [4.5, 6.0, 7.0] do_median_filtering = [True, False] quality = [1.5, 4.5, 8.5] linking_max_distance = [6.0, 10.0, 15.0] gap_closing_max_distance = [6.0, 10.0, 15.0] max_frame_gap = [1, 2, 5] track_displacement = [0.0, 10.0, 20.0] sweep = [ radius, do_median_filtering, quality, linking_max_distance, gap_closing_max_distance, max_frame_gap, track_displacement ] all_params = list(itertools.product(*sweep)) #Variable prep #---------------------------------- s3 = boto3.client('s3') folder = '01_18_Experiment' s_folder = '{}/sensitivity'.format(folder) local_folder = '.' prefix = "P1_S1_R_0001_2_2" name = "{}.tif".format(prefix) local_im = op.join(local_folder, name) aws.download_s3( '{}/{}/{}.tif'.format(folder, prefix.split('_')[0], prefix), '{}.tif'.format(prefix)) outputs = np.zeros((len(all_params), len(all_params[0]) + 2)) #Tracking and calculations #------------------------------------ params = all_params[counter] outfile = 'Traj_{}_{}.csv'.format(name.split('.')[0], counter) msd_file = 'msd_{}_{}.csv'.format(name.split('.')[0], counter) geo_file = 'geomean_{}_{}.csv'.format(name.split('.')[0], counter) geoS_file = 'geoSEM_{}_{}.csv'.format(name.split('.')[0], counter) msd_image = 'msds_{}_{}.png'.format(name.split('.')[0], counter) iter_name = "{}_{}".format(prefix, counter) ij.track(local_im, outfile, template=None, fiji_bin=None, radius=params[0], threshold=0., do_median_filtering=params[1], quality=params[2], x=511, y=511, ylo=1, median_intensity=300.0, snr=0.0, linking_max_distance=params[3], gap_closing_max_distance=params[4], max_frame_gap=params[5], track_displacement=params[6]) traj = ut.csv_to_pd(outfile) msds = msd.all_msds2(traj, frames=651) msds.to_csv(msd_file) gmean1, gSEM1 = hm.plot_individual_msds(iter_name, alpha=0.05) np.savetxt(geo_file, gmean1, delimiter=",") np.savetxt(geoS_file, gSEM1, delimiter=",") aws.upload_s3(outfile, '{}/{}'.format(s_folder, outfile)) aws.upload_s3(msd_file, '{}/{}'.format(s_folder, msd_file)) aws.upload_s3(geo_file, '{}/{}'.format(s_folder, geo_file)) aws.upload_s3(geoS_file, '{}/{}'.format(s_folder, geoS_file)) aws.upload_s3(msd_image, '{}/{}'.format(s_folder, msd_image)) print('Successful parameter calculations for {}'.format(iter_name))
def regress_sys(folder, all_videos, y, training_size, have_output=True): """ Uses regression techniques to select the best tracking parameters. Regression again intensities of input images. Parameters ---------- all_videos: list Contains prefixes of video filenames of entire video set to be tracked. Training dataset will be some subset of these videos. y: numpy array Contains manually acquired quality levels using Trackmate for the files contained in the training dataset. training_size: int Number of files in training dataset. have_output: boolean If you have already acquired the quality values (y) for the training dataset, set to True. If False, it will output the files the user will need to acquire quality values for. Returns ------- regress_object: list of sklearn regression objects. Contains list of regression objects assembled from the training datasets. Uses the mean, 10th percentile, 90th percentile, and standard deviation intensities to predict the quality parameter in Trackmate. """ tprefix = [] for i in range(0, training_size): random.seed(i + 1) tprefix.append(all_videos[random.randint(0, len(all_videos))]) if have_output is False: print("Get parameters for: {}".format(tprefix[i])) if have_output is True: # Define descriptors descriptors = np.zeros((training_size, 4)) counter = 0 for name in tprefix: pup = name.split('_')[0] local_im = name + '.tif' remote_im = "{}/{}/{}".format(folder, pup, local_im) aws.download_s3(remote_im, local_im) test_image = sio.imread(local_im) descriptors[counter, 0] = np.mean(test_image[0, :, :]) descriptors[counter, 1] = np.std(test_image[0, :, :]) descriptors[counter, 2] = np.percentile(test_image[0, :, :], 10) descriptors[counter, 3] = np.percentile(test_image[0:, :, :], 90) counter = counter + 1 # Define regression techniques X = descriptors classifiers = [ svm.SVR(), linear_model.SGDRegressor(), linear_model.BayesianRidge(), linear_model.LassoLars(), linear_model.ARDRegression(), linear_model.PassiveAggressiveRegressor(), linear_model.TheilSenRegressor(), linear_model.LinearRegression() ] regress_object = [] for item in classifiers: clf = item regress_object.append(clf.fit(X, y)) return regress_object