def ensembleMSDalltogether(filenames): currentDir = os.getcwd() sd_ensemble = [] dt_ensemble = [] for file1 in filenames: dt_filename = currentDir + "/dt_" + file1 sd_filename = currentDir + "/sd_" + file1 if os.path.isfile(dt_filename) and os.path.isfile(sd_filename): dt_data = np.load(dt_filename) sd_data = np.load(sd_filename) else: stats, sd_data, dt_data = MSD.singleTrajectoryAnalysis(file1) (i, _) = sd_data.shape k = dt_data.size assert (i == k), "sd and dt data mismatch in: " + file1 sd_list = arMa.makeListOfLists(sd_data) dt_list = list(dt_data) sd_ensemble, dt_ensemble = arMa.mergeLists(sd_ensemble, dt_ensemble, sd_list, dt_list) stats = np.zeros((len(dt_ensemble), 3)) # msd, std, dt for a in range(len(dt_ensemble)): stats[a, 2] = dt_ensemble[a] stats[a, 0] = 3 * statistics.mean(sd_ensemble[a]) stats[a, 1] = 3 * statistics.stdev(sd_ensemble[a]) / math.sqrt( len(sd_ensemble[a])) # Oho, but what's this? We have some experimental error that makes the y-intercept non-zero! # I tried assuming it's the same for every cell type, but that might be incorrect. # We'll extrapolate it from a linear fit of the first 7 non-zero dts, then subtract it. if dt_ensemble[0] == 0: dt_slice = dt_ensemble[1:8] statSlice = stats[1:8, 0] linearFit = np.polyfit(dt_slice, statSlice, 1) intercept = linearFit[0] stats[1:, 0] = stats[1:, 0] - intercept stats[1:, 1] = stats[1:, 1] + intercept else: dt_slice = dt_ensemble[0:7] statSlice = stats[0:7, 0] linearFit = np.polyfit(dt_slice, statSlice, 1) intercept = linearFit[0] stats[:, 0] = stats[:, 0] - intercept stats[:, 1] = stats[:, 1] + intercept # Stats: [dtIndex] [MSD | stddev | dt] return stats
def ensembleMSD(filenames): currentDir = os.getcwd() stats_ensemble = [] dt_ensemble = [] # dt_mean_ensemble = [] # dt_std_ensemble = [] # means_ensemble = [] # stds_ensemble = [] for file1 in filenames: stats_filename = currentDir + "/stats_" + file1 if os.path.isfile(stats_filename): stats = np.load(stats_filename) dt_data = stats[:, 2] else: stats, _, dt_data = MSD.singleTrajectoryAnalysis(file1) lenDt = len(dt_data) for dtIndex in range(lenDt): stats_ensemble, dt_ensemble = arMa.dataInsert( stats_ensemble, dt_ensemble, stats[dtIndex, :], dt_data[dtIndex]) # means_ensemble, dt_mean_ensemble = arMa.dataInsert(means_ensemble, dt_mean_ensemble, stats[dtIndex,0], dt_data[dtIndex]) # stds_ensemble, dt_std_ensemble = arMa.dataInsert(stds_ensemble, dt_std_ensemble, stats[dtIndex,1], dt_data[dtIndex]) # So now, stats_ensemble is a list of lists of numpy arrays, and dt_ensemble is a list # We will perform a weighted average of each MSD in the ensemble, with the weights equal to the inverse of the variance for each point # assert (dt_mean_ensemble == dt_std_ensemble), "Something went wrong" lenDt = len(dt_ensemble) finalStats = np.zeros((lenDt, 3)) if dt_ensemble[0] == 0: for a in range(1, lenDt): finalStats[a, 2] = dt_ensemble[a] thisMean, thisSD = weightedMean(stats_ensemble[a]) finalStats[ a, 0] = 3 * thisMean # Multiple of three comes from converting the radial MSD to the genomic MSD. finalStats[a, 1] = 3 * thisSD else: for a in range(lenDt): finalStats[a, 2] = dt_ensemble[a] thisMean, thisSD = weightedMean(stats_ensemble[a]) finalStats[ a, 0] = 3 * thisMean # Multiple of three comes from converting the radial MSD to the genomic MSD. finalStats[a, 1] = 3 * thisSD # Oho, but what's this? We have some experimental error that makes the y-intercept non-zero! # I tried assuming it's the same for every cell type, but that might be incorrect. # We'll extrapolate it from a linear fit of the first 7 non-zero dts, then subtract it. if dt_ensemble[0] == 0: dt_slice = dt_ensemble[1:8] statSlice = finalStats[1:8, 0] linearFit = np.polyfit(dt_slice, statSlice, 1) intercept = linearFit[0] finalStats[1:, 0] = finalStats[1:, 0] - intercept else: dt_slice = dt_ensemble[0:7] statSlice = finalStats[0:7, 0] linearFit = np.polyfit(dt_slice, statSlice, 1) intercept = linearFit[0] finalStats[:, 0] = finalStats[:, 0] - intercept return finalStats