def featureErrorAnalyser(dataHome, plot = False): # this is the hard coded order of the processing (ie image processing goes from # masked to aligned, then aligned to re...) processOrder = ["maskedSamples", "alignedSamples", "ReAlignedSamples", "NLalignedSamples"] dataSrc = dataHome + "landmark/" dfPath = glob(dataSrc + "*Samples.csv") dfs = [] for p in processOrder: pathNo = np.where([d.find(p)>-1 for d in dfPath])[0][0] dfs.append(dfPath[pathNo]) infoAll = [] for d in dfs: infodf = quickStats(d) infoAll.append(np.sqrt(infodf)) df = pd.concat(infoAll) names = sorted(nameFromPath(list(df.keys()), 4)) ids = np.unique(nameFromPath(names, 2)) print("---- pValues ----") for i in ids: keyInfo, _ = getMatchingList(list(df.keys()), [i], True) # print(keyInfo) for p0, p1 in zip(keyInfo[:-1], keyInfo[1:]): p0df = df[p0] p1df = df[p1] pV = scipy.stats.ttest_ind(p0df, p1df, nan_policy = 'omit').pvalue print(i + ": " + p0.split("_")[-1] + "-->" + p1.split("_")[-1] + " = " + str(pV)) # plot the distribution of the errors # initialise info = [] idstore = None if plot: for n in names: name = n.split("_")[-1] id = nameFromPath(n) if idstore is None or id == idstore: info.append(df[n]) names.append(name) else: plt.hist(info) plt.legend(names) plt.title(idstore) plt.show() info = []; info.append(df[n]) names = []; names.append(id) idstore = id plt.hist(info) plt.legend(names) plt.title(id) plt.show()
def standardiser(d, path): specname = d.split("/")[-2] print(specname) dest = d + "/3/maskedSamplesNormSize/" dirMaker(dest, True) d += path imgs = glob(d + "*.png") shapes = [] for i in imgs: shape = cv2.imread(i).shape shapes.append(shape) maxShape = np.max(np.array(shapes), axis=0) print(specname + " has max shape " + str(maxShape)) plate = np.zeros(maxShape).astype(np.uint8) for i in imgs: sampname = nameFromPath(i, 3) img = cv2.imread(i) if sampname.lower().find("c") > -1: pass #img = cv2.rotate(img, cv2.ROTATE_180) s = img.shape imgR = plate.copy() imgR[:s[0], :s[1], :] = img cv2.imwrite(dest + sampname + ".png", imgR)
def getFiles(s, size, dataHome): # copy the tif files from HPC print("Starting " + s) path = '/Volumes/USB/' + s + str(size) + '/tifFiles/' dirMaker(path) imgs = sorted(glob(dataHome + s + '/3/tifFiles/*.tif')) for i in imgs: print(" Copying " + nameFromPath(i)) os.system('scp -r ' + i + ' ' + path) print("Finished " + s)
def processErrors(dataHome, size, datasrc, plot): ''' Get the per feature error as a csv, save it and provide some quick stats ''' annosrc = dataHome + "landmark/" names = nameFromPath(sorted(glob(annosrc + "S*"))) # get the features and calculate the error between them featureErrors = getFeatureError(dataHome, size, datasrc, plot) # save as a csv dirMaker(dataHome + "landmark/") featureErrors.to_csv(dataHome + "landmark/" + datasrc + ".csv")
def getTransformedFeatures(dataHome, size, imgsrc): ''' This extracts the annotated features from the annotated images ''' print("----- " + imgsrc + " -----") imgsrc = dataHome + str(size) + "/" + imgsrc + "/" annosrc = dataHome + "landmark/" annos = sorted(glob(annosrc + "S*")) imgs = sorted(glob(imgsrc + "*png*")) imgsToUse = getMatchingList(annos, imgs) greenPosAll = {} greenPosNo = [] for i, a in zip(imgsToUse, annos): name = nameFromPath(i, 3) img = cv2.imread(i) # get the positions of the features (red channel always 0, green channel never 0) mask = (img[:, :, 0] == 0)*1 * (img[:, :, 1] != 0)*1 maskPos = np.where(mask == 1) gp = np.c_[maskPos[0], maskPos[1]] if len(gp) == 0: continue featsNo = len(pd.read_csv(a)) # NOTE hardcoded because there is one feature which was removed from the # sample during the specimenID and it's way to hard to compensate for that if name.find("7") > -1: featsNo -= 1 greenPosNo.append(featsNo) greenPosAll[name] = gp return(greenPosAll, greenPosNo)
def quickStats(dfPath): # read in the df df = pd.read_csv(dfPath, index_col=0) name = nameFromPath(dfPath) print("\n---- " + name + " ----") # Provide some quick stats names = list(df.keys()) # NOTE to compare to ANHIR data, compute the # landmark registration accuracy (TRE) which is: ''' relative Target Registration Error (rTRE) as the Euclidean distance between the submitted coordinates xˆlj and the manually determined (ground truth) coordinates xlj (withheld from participants) NOTE use the original image diagonal rather than the registered image diagonal.... (Borovec, J., Kybic, J., Arganda-Carreras, I., Sorokin, D. V., Bueno, G., Khvostikov, A. V., ... & Muñoz-Barrutia, A. (2020). ANHIR: automatic non-rigid histological image registration challenge. IEEE transactions on medical imaging, 39(10), 3042-3052.) ''' tre = 3120 # based on an image WxH or ~1700X2500 pixels for e, name in zip(df.T.values, names): # remove nans e = e[~np.isnan(e)] dist = np.round(np.median(e), 1) std = np.round(np.sqrt(np.std(e)), 1) qrt = np.round(scipy.stats.iqr(e),1) m = np.round(np.max(e), 2) print(name + ": " + str(dist) + "±" + str(qrt) + ", max=" + str(m)) return(df)
segsrc = src + "Segmentations" + model + "/" imgDest = src + "SegmentationsEvals" + model + "/" masks = sorted(glob(segsrc + "*.png")) imgs = sorted(glob(imgsrc + "*anno*.png")) dirMaker(imgDest) maskToUse = getMatchingList(imgs, masks) for n, (m, i) in enumerate(zip(maskToUse, imgs)): if m is None: continue name = nameFromPath(i, 3) print(name + " Processing") img = cv2.imread(i) mask = cv2.imread(m) xi, yi, _ = img.shape xm, ym, _ = mask.shape xr, yr = tuple(np.round((xi / xm) * np.array([xm, ym])).astype(int)) imgStored = [] for ni, i in enumerate(np.unique(mask)): plate = np.zeros(mask.shape).astype(np.uint8) if model == "ResNet101":