def metrics(i, ref_name, ref_file, args): # print('Reading %s (%d of %d)' % (ref_names[i], i, len(ref_names))) ref = np.round(nib.load(os.path.join(args.ref_dir[0], ref_file)).get_data()).astype(int) in2 = None if in2_files: in2 = np.round(nib.load(os.path.join(args.in2_dir[0], in2_files[i])).get_data()).astype(int) dict = {} dict['ref_name'] = ref_name # compute per-label metrics if args.per_label: u_lab = np.unique(ref) u_lab = np.delete(u_lab, np.where(u_lab == 0)) for lab in u_lab: dict['ref_vol_lab%d' % lab] = (ref == lab).sum() if in2_files: dict['in2_vol_lab%d' % lab] = (in2 == lab).sum() dict['dice_lab%d' % lab] = 1. - dice((ref == lab).ravel(), (in2 == lab).ravel()) # compute whole volume metrics dict['ref_vol'] = (ref > 0).sum() if in2_files: dict['in2_vol'] = (in2 > 0).sum() dict['dice'] = 1. - dice((ref > 0).ravel(), (in2 > 0).ravel()) return dict
def process_ws(wlist): pmi_cosine_list, lsa_cosine_list, pmi_dice_list, lsa_dice_list = [], [], [], [] """ will take a decade to compute """ with open("cosine.txt", "w+") as cosine_file, open("dice.txt", "w+") as dice_file: cosine_file.write("pair\t\t\tppmi lsa golden\n") dice_file.write("pair\t\t\tppmi lsa golden\n") for i, pair in enumerate(wlist): pmi_vector_1 = np.array([ppmi.get(pair[0]).values]) pmi_vector_2 = np.array([ppmi.get(pair[1]).values]) pmi_vector_1 = pmi_vector_1[~np.isnan(pmi_vector_1)] pmi_vector_2 = pmi_vector_2[~np.isnan(pmi_vector_2)] ppmi_number_cos = cosine_similarity(pmi_vector_1.reshape(1, -1), pmi_vector_2.reshape(1, -1))[0][0] ppmi_number_dice = dice(pmi_vector_1, pmi_vector_2) lsa_vector_1 = np.array([lsa.get(pair[0]).values]) lsa_vector_2 = np.array([lsa.get(pair[1]).values]) lsa_vector_1 = lsa_vector_1[~np.isnan(lsa_vector_1)] lsa_vector_2 = lsa_vector_2[~np.isnan(lsa_vector_2)] lsa_number_cos = cosine_similarity(lsa_vector_1.reshape(1, -1), lsa_vector_2.reshape(1, -1))[0][0] lsa_number_dice = dice(lsa_vector_1, lsa_vector_2) cosine_file.write( f"{pair}\t{round(ppmi_number_cos, 3)}\t{round(lsa_number_cos, 3)}\t{beato[i]}\n" ) dice_file.write( f"{pair}\t{round(ppmi_number_dice, 3)}\t{round(lsa_number_dice, 3)}\t{beato[i]}\n" ) pmi_cosine_list.append(ppmi_number_cos) pmi_dice_list.append(ppmi_number_dice) lsa_cosine_list.append(lsa_number_cos) lsa_dice_list.append(lsa_number_dice) corr_pmi_cos = pearsonr(pmi_cosine_list, beato) corr_lsa_cos = pearsonr(lsa_cosine_list, beato) corr_pmi_dice = pearsonr(pmi_dice_list, beato) corr_lsa_dice = pearsonr(lsa_dice_list, beato) with open("correlations.txt", "w+") as corr_file: corr_file.write( f"for cosine similarity the correlations are: \npmi: {round(corr_pmi_cos[0], 3)} at" f" {round(corr_pmi_cos[1], 3)} significance level\n") corr_file.write( f"lsa: {round(corr_lsa_cos[0], 3)} at" f" {round(corr_lsa_cos[1], 3)} significance level\n") corr_file.write( f"for dice similarity the correlations are: \npmi: {round(corr_pmi_dice[0], 3)} at" f" {round(corr_pmi_dice[1], 3)} significance level\n") corr_file.write( f"lsa: {round(corr_lsa_dice[0], 3)} at" f" {round(corr_lsa_dice[1], 3)} significance level\n") corr_file.write( f"можно заметить, что корелляция слабая между вордсим и ручным подсчетом по корпусу" ) print("for corr:", corr_pmi_cos, corr_lsa_cos) print("for dice:", corr_lsa_dice, corr_pmi_dice)
def score_reconstructions(X, X_hat): D = [] for i in range(X.shape[0]): score = dice(test_fused_X[i].astype(int), np.round_(X_hat[i], 0).astype(int)) D.append(score) print 'Mean DICE Dissimilarity Score (0.0 is no dissimilarity, 1.0 is total dissimilarity): {} '.format(np.mean(D)) return D
def dice_matrix(labels1, labels2): """ Calculate dice similarity coefficient matrix of the inputs. Parameters ---------- labels1: cluster labels, shape = [n_samples]. labels2: cluster labels, shape = [n_samples]. Returns ------- dice_mat: array, ranges from (0, 1), shape = (label_number1, label_number2). Notes ----- 1. the label 0 in labels should be assigned to the medial wall, and it will be ommited. """ from scipy.spatial.distance import dice label_list1 = np.unique(labels1) label_list2 = np.unique(labels2) # label 0 will not be concerned. label_list1 = label_list1[np.where(label_list1 != 0)] label_list2 = label_list2[np.where(label_list2 != 0)] row_num, column_num = np.shape(label_list1)[0], np.shape(label_list2)[0] dice_mat = np.zeros((row_num, column_num)) for i, l1 in enumerate(label_list1): for j, l2 in enumerate(label_list2): dice_mat[i, j] = 1 - dice(labels1 == l1, labels2 == l2) # dice() measures dice dissimilarity return np.nan_to_num(dice_mat)
def get_nearest_neighbor(self, x_test, k, sample_class): distances = [] targets_index = [] for i in range(len(sample_class)): if (sample_class[i][:] != x_test).any(): if self.distance_calculator == 'jaccard': distance = dis.jaccard(x_test, sample_class[i][:]) elif self.distance_calculator == 'dice': distance = dis.dice(x_test, sample_class[i][:]) elif self.distance_calculator == 'correlation': distance = dis.correlation(x_test, sample_class[i][:]) elif self.distance_calculator == 'yule': distance = dis.yule(x_test, sample_class[i][:]) elif self.distance_calculator == 'russelo-rao': distance = dis.russellrao(x_test, sample_class[i][:]) elif self.distance_calculator == 'sokal-michener': distance = dis.sokalmichener(x_test, sample_class[i][:]) elif self.distance_calculator == 'rogers-tanimoto': distance = dis.rogerstanimoto(x_test, sample_class[i][:]) elif self.distance_calculator == 'kulzinsky': distance = dis.kulsinski(x_test, sample_class[i][:]) distances.append([distance, i]) # make a list of the k neighbors' targets distances.sort() for i in range(k): targets_index.append(distances[i][1]) return targets_index
def dice_coefficient(y_true, y_pred): """ Computes the Sorensen-Dice metric TP Dice = 2 ------- T + P Parameters ---------- y_true : numpy.array Binary representation y_pred : keras.placeholder Binary representation Returns ------- scalar Dice metric """ y_pred = y_pred > 0 y_true = y_true > 0 y_pred_flatten = y_pred.reshape(-1, 1) y_true_flatten = y_true.reshape(-1, 1) dice_score_negated = dice(y_true_flatten, y_pred_flatten) return 1 - dice_score_negated
def distance(x, y, weights = [], p = 3, method = "euclidean"): ''' :param weights: :param p: :param x: X vector :param y: Y vector :param method: Method to Find Distance :return: The Distance Value ''' value = 0.00 if method == "euclidean": value = distance.euclidean(x, y) elif method == "minkowski": value = distance.minkowski(x, y, p) elif method == "cosine": value = distance.cosine(x, y) elif method == "manhattan": value = distance.cityblock(x, y) elif method == "dice": value = distance.dice(x, y) elif method == "jaccard": value = distance.jaccard(x, y) elif method == "hamming": value == distance.hamming(x, y) elif method == "canbera": value == distance.chebyshev(x, y) else: print(method, " Not Found! unsing Eclidean Distance!") value = distance.euclidean(x, y) return value
def NewsToTweetsScor_pair(newsVecList, newsWordList, tweetVecList, tweetWordList, scoreFile): print 'Score pair wise start' newsVecList_len = len(newsVecList) tweetVecList_len = len(tweetVecList) total_dist = [] for i in range(newsVecList_len): u = newsVecList[i] print i, ' = ', u_to_v = [] for j in range(tweetVecList_len): v = tweetVecList[j] val = distance.cosine(u, v) val += distance.euclidean(u, v) val += distance.dice(u, v) val += distance.correlation(u, v) val += distance.jaccard(u, v) val += distance.cityblock(u, v) val = val / 6.0 u_to_v.append(val) total_dist.append(u_to_v) print 'pair wise end' return total_dist
def validate_brainMask(groundTruth, brainMask): A = groundTruth B = brainMask # convert ground truth and automated brain mask into 1D arrays A = A.reshape(A.shape[0] * A.shape[1], A.shape[2]) A = A.flatten() B = B.reshape(B.shape[0] * B.shape[1], B.shape[2]) B = B.flatten() # dice similiarity coefficient dsc = 1 - (distance.dice(A, B)) # jaccard similiarity coefficient jsc = 1 - (distance.jaccard(A, B)) # precision, recall, f-score prf = precision_recall_fscore_support(A, B) # accuracy acc = metrics.accuracy_score(A, B) # confusion matrix cm = confusion_matrix(A, B) return (dsc, jsc, prf, acc, cm)
def get_node_max_dice(self, mask): pc = PointCloud.from_mask(mask) node_set = set() for ijk in pc: # get node node = self.ijk_leaf_dict[ijk] # add node and all node_set.add(node) node_set |= nx.ancestors(self, node) assert pc, 'no intersection with mask' d_max = 0 for node in node_set: # compute dice of the node node_mask = self.get_pc(node=node).to_mask(shape=self.ref.shape) d = 1 - dice(mask.flatten(), node_mask.flatten()) # store if max dice if d > d_max: node_min_dice = node d_max = d return node_min_dice, d_max
def get_dice(self, mask): """ computes dice score """ if sum(mask.flatten()): return 1 - dice(mask.flatten(), self.mask.flatten()) else: # no area detected return 0
def score_reconstructions(X, X_hat): D = [] for i in range(X.shape[0]): score = dice(test_fused_X[i].astype(int), np.round_(X_hat[i], 0).astype(int)) D.append(score) print 'Mean DICE Dissimilarity Score (0.0 is no dissimilarity, 1.0 is total dissimilarity): {} '.format( np.mean(D)) return D
def roomietoroomieall(point, roomieid, budget, miles): v = DictVectorizer(sparse=False) # test_pnt = Point(float(-97.7436994), float(30.2711286)) pnt = GEOSGeometry(point, srid=4326) # roomieid = 287 # budget = 500 roomies = Roomie.objects.filter( preferredLocation__distance_lte=(pnt, D(mi=miles)), uCost__gte=budget).values('user_id') uprofilesroomies = UserProfile.objects.filtee( Q(user__pk__in=roomies) | Q(user__pk=roomieid)).values( 'noise', 'foodPreference', 'cleanliness', 'gender', 'cooking', 'uPets', 'sleep', 'smoking', 'socializing', 'alcohol') uprofilesroomiesids = map( lambda x: x.id, UserProfile.objects.filter( Q(user__pk__in=roomies) | Q(user__pk=roomieid)).only("id")) uprofilesroomiesuserids = map( lambda x: x.user_id, UserProfile.objects.filter( Q(user__pk__in=roomies) | Q(user__pk=roomieid)).only("user_id")) RD = v.fit_transform(uprofilesroomies) profilecount = len(RD) distmatrix = np.zeros((profilecount, profilecount)) for i in range(profilecount): for j in range(profilecount): cooking = abs(RD[i][3] - RD[j][4]) / 7 firstpart = dice(RD[i][0:3], RD[j][0:3]) secondpart = dice(RD[i][4:], RD[j][4:]) dist = (cooking + firstpart + secondpart) / 26 distmatrix[i][j] = dist ids = uprofilesroomiesuserids.index(roomieid) distmatrixnewsorted = np.argsort(distmatrix[ids])[1:] result = [] for i in range(len(distmatrixnewsorted)): result.append(uprofilesroomiesuserids[distmatrixnewsorted[i]]) newresult = Roomie.objects.filter(user__pk__in=result) return newresult
def dice_coeff(arr1, arr2, weighted=True): """ Compute Dice's coefficient between two images. Parameters ---------- arr1 : Nifti1Image, str, ndarray One ndarray to compare. Can be a path or image, which will be converted to an ndarray. arr2 : Nifti1Image, str, ndarray The other ndarray to compare. Can be a path or image, which will be converted to an ndarray. weighted : bool, optional Whether or not to weight the DICE coefficient as in [Cousineau2017]_. The weighted Dice coefficient is calculated by adding the sum of all values in arr1 where arr2 is nonzero to the sum of all values in arr2 where arr1 is nonzero, then dividing that by the sum of all values in arr1 and arr2. Default: True Returns ------- The dice similarity between the images. Notes ----- .. [1] Cousineau M, Jodoin PM, Morency FC, et al. A test-retest study on Parkinson's PPMI dataset yields statistically significant white matter fascicles. Neuroimage Clin. 2017;16:222-233. Published 2017 Jul 25. doi:10.1016/j.nicl.2017.07.020 """ if isinstance(arr1, str): arr1 = nib.load(arr1) if isinstance(arr2, str): arr2 = nib.load(arr2) if isinstance(arr1, nib.Nifti1Image): arr1 = arr1.get_fdata() if isinstance(arr2, nib.Nifti1Image): arr2 = arr2.get_fdata() arr1 = arr1.flatten() arr2 = arr2.flatten() if weighted: return ( np.sum(arr1 * arr2.astype(bool)) + np.sum(arr2 * arr1.astype(bool)))\ / (np.sum(arr1) + np.sum(arr2)) else: # scipy's dice function returns the dice *dissimilarity* return 1 - dice( arr1.astype(bool), arr2.astype(bool))
def _run_interface(self, runtime): t1_img = nb.load(self.inputs.anatomical_mask) dwi_img = nb.load(self.inputs.dwi_mask) if not t1_img.shape == dwi_img.shape: raise Exception("Cannot compare masks with different shapes") self._results['dice_score'] = distance.dice( t1_img.get_fdata().flatten(), dwi_img.get_fdata().flatten()) return runtime
def dice(self, x=None, y=None, w=None): """ Dice系数是一种集合相似度度量函数,通常用于计算两个样本的相似度 x = [1, 0, 0] y = [0, 1, 0] """ x = x or self.x y = y or self.y w = w or self.w return distance.dice(x, y, w)
def avg_dice_distance(t1, t2, label_ids): if not label_ids: ulab = np.unique(np.concatenate((np.unique(t1), np.unique(t2)), axis=0)) ulab = np.delete(ulab, np.where(ulab==0)) else: ulab = np.array(label_ids) count = 0. for i_lab in ulab: count += dice(t1 == i_lab, t2 == i_lab) return count / float(ulab.size)
def finalyze_mask(self, prefix=""): if self.full_mask is not None: if np.any(self.full_pred > .5) or np.any(self.full_mask >= 1): d = 1 - dice(self.full_pred.flatten() > .5, self.full_mask.flatten() >= 1) self.dice.append(d) if self.config.dbg: print(self.prev_name, ' dice: ', d) else: return if self.config.dbg: self.visualize(show_light=True) if self.config.save_images: self.save(self.prev_name, prefix=prefix)
def dice_manual2(nom1, nom2): im = Image.open(nom1) im2 = Image.open(nom2) im = im.convert('1') im2 = im2.convert('1') im = np.array(im) im2 = np.array(im2) im = im.flatten() im2 = im2.flatten() im = im.tolist() im2 = im2.tolist() val = distance.dice(im, im2) return val
def get_test_predictions(model, test_dataloader, label_embed, threshold=0.5, metric="jaccard"): """ Predicts on the test data and returns its evaluation metrics and the predictions. Parameters ---------- model model, CategoryPredictor model in training test_dataloader BatchWrapper object, test dataloader to get predictions for label_embed dict, dictionary with keys named after label words whose word embeddings are stored in the values. threshold float, the threshold above which a logit is classified as 1. metric string, evaluation metric. ['jaccard', 'dice'] Returns ------- test_preds np.array, N x num_class array of predictions avg_score float, performance score calculated by using the evalutaion metric """ test_preds = [] avg_score = 0 n_test = 0 for x, y in tqdm(test_dataloader): preds = model(x, label_embed) preds = preds.data preds = torch.sigmoid(preds).data.numpy() pred_labels = (preds > threshold) + 0 test_preds.append(pred_labels) if metric == "jaccard": score = jaccard_score(y, pred_labels, average="samples") elif metric == "dice": batch_score = [ dice(y[i, :], pred_labels[i, :]) for i in range(pred_labels.shape[0]) ] score = np.mean(batch_score) avg_score += score n_test += x.size(0) avg_score /= n_test test_preds = np.vstack(test_preds) return test_preds, avg_score
def calculate(self, image, name, binary_thresh): left = evaluate.restore(image) right = evaluate.restore(self.auto_encoder(image)) diff = np.where(left > right, left - right, right - left).clip(0, 255) # binaryzation: background: 0 lesion areas: 1 _, binary = cv2.threshold(rgb2gray(diff).astype(np.uint8), binary_thresh, 1, cv2.THRESH_BINARY) bounding_box_lst = self.groundtruth_dict[name] dice_loss_lst = [] for bounding_box in bounding_box_lst: pos_x, pos_y, size = bounding_box[0], bounding_box[1], bounding_box[2] groundtruth = np.ones((size, size)).astype(np.uint8) pred = binary[pos_y: pos_y + size, pos_x: pos_x + size] dice_loss_lst.append(1 - distance.dice(groundtruth.reshape(-1), pred.reshape(-1))) return sum(dice_loss_lst) / len(dice_loss_lst)
def dice_similarity(f1, f2): if (len(f1) == 0 or len(f2) == 0): return 1 try: count_vectorizer = CountVectorizer() f1 = (' '.join(map(str, f1))) f2 = (' '.join(map(str, f2))) sparse_matrix = count_vectorizer.fit_transform([f1, f2]) doc_term_matrix = sparse_matrix.todense() # Compute DICE Similarity dice = distance.dice(doc_term_matrix[0], doc_term_matrix[1]) #print(dice) return (dice) except Exception as e: return 1
def update(self, pred_batch: np.ndarray, target_batch: np.ndarray): """ Input: - pred_batch.shape: [B, H, W] - target_batch.shape: [B, H, W] """ assert len(pred_batch) == len(target_batch) for i in range(len(pred_batch)): dice_score = [] ahd_score = [] for j in range(self.num_classes): pred_bool = (pred_batch[i] == j).flatten() target_bool = (target_batch[i] == j).flatten() if np.alltrue( pred_bool == target_bool): # 规避dice()在输入全0时输出nan的问题 dice_score.append(1.0) else: dice_score.append( 1 - dice(pred_bool, target_bool)) # Dice score = 1 - Dice distance # 计算AHD if j == 0: ahd_score.append(-1.0) # 不计算背景像素的分数,因为会占很大内存,而且没有用处 else: pred_coord = np.array(np.where(pred_batch[i] == j)).T target_coord = np.array(np.where(target_batch[i] == j)).T if len(target_coord) == 0 and len( pred_coord) == 0: # 规避0长度数组 ahd_score.append(1.0) elif len(target_coord) == 0 or len(pred_coord) == 0: ahd_score.append(0.0) else: D_mat = np.sqrt( inner1d(pred_coord, pred_coord)[np.newaxis].T + inner1d(target_coord, target_coord) - 2 * np.dot(pred_coord, target_coord.T)) dH = np.max( np.array([ np.max(np.min(D_mat, axis=0)), np.max(np.min(D_mat, axis=1)) ])) ahd_score.append(dH) self.dice_scores.append(tuple(dice_score)) self.ahd_scores.append(tuple(ahd_score))
def dice_distance(self, query, transformation='tfidf'): """ Return the dice coefficient between a query and the book's term frequency vector. the Dice coefficient is 2 times the dot product of the query and the book divided by the sum of all squared terms in both the query and the book Dice = 2 * dot(query, book) / (sum of book squared terms + sum of query squared terms) """ query_postings = self.posting_set.filter(token__name__in=query.keys()) query_vector, book_vector = utils.get_transformed_vector( query_postings, query, transformation) return 1 - dice(query_vector, book_vector)
def distances(v1, v2): if v1.sum() == 0 or v2.sum() == 0: if v1.sum() == v2.sum(): return _NEAR else: return _FAR v1 = v1.toarray() v2 = v2.toarray() b1 = v1 > 0 b2 = v2 > 0 return np.asarray([ sp_dist.cosine(v1, v2), sp_dist.dice(b1, b2), sp_dist.hamming(b1, b2), sp_dist.kulsinski(b1, b2) ])
def cross_channel_boolean_distance_features(mask): """calculates the cross channel distance features Calculates the distances across channels Parameters ---------- mask : 3D array, shape (M, N, C) The input mask with multiple channels. Returns ------- features : dict dictionary including different distances across channels """ features = dict() for ch1 in range(mask.shape[2]): for ch2 in range(ch1 + 1, mask.shape[2]): # rehaping the channels to 1D channel1 = mask[:, :, ch1].ravel() channel2 = mask[:, :, ch2].ravel() # creating the suffix name for better readability suffix = "_Ch" + str(ch1 + 1) + "_Ch" + str(ch2 + 1) # storing the distance values features["dice_distance" + suffix] = dist.dice(channel1, channel2) features["hamming_distance" + suffix] = dist.hamming( channel1, channel2) features["jaccard_distance" + suffix] = dist.jaccard( channel1, channel2) features["kulsinski_distance" + suffix] = dist.kulsinski( channel1, channel2) features["rogerstanimoto_distance" + suffix] = dist.rogerstanimoto( channel1, channel2) features["russellrao_distance" + suffix] = dist.russellrao( channel1, channel2) features["sokalmichener_distance" + suffix] = dist.sokalmichener( channel1, channel2) features["sokalsneath_distance" + suffix] = dist.sokalsneath( channel1, channel2) features["yule_distance" + suffix] = dist.yule(channel1, channel2) return features
def hero_dice_distance( heros_superpowers, hero_name='A-Bomb', features=['Agility', 'Accelerated Healing', 'Lantern Power Ring']): result = [] # Get the hero to be compared; main_hero = heros_superpowers.loc[heros_superpowers['hero_names'] == hero_name][features] # Compare using only the given features; heros_indexed_by_features = heros_superpowers[features] # For each line in the dataset, with the columns indexed by features, calculate the score and create a new column called 'Score' for index, row in heros_indexed_by_features.iterrows(): result.append(distance.dice(main_hero, row)) heros_indexed_by_features.insert(0, "Score", result, True) return heros_indexed_by_features
def avg_dice_distance(t1, t2, label_ids=None): if label_ids is None: ulab = np.unique(np.concatenate((np.unique(t1), np.unique(t2)), axis=0)) ulab = np.delete(ulab, np.where(ulab == 0)) else: ulab = np.array(label_ids) count = 0. for i_lab in ulab: count += dice(t1 == i_lab, t2 == i_lab) retval = 0. if ulab.size > 0: retval = count / float(ulab.size) return retval
def all_dice(pred_path, gt_path): all_d= [] for im in os.listdir(pred_path): img_ds = gdal.Open(os.path.join(pred_path, im), gdal.GA_ReadOnly) img = img_ds.GetRasterBand(1).ReadAsArray() gt_ds = gdal.Open(os.path.join(gt_path, im.replace('RGB', "GTI")), gdal.GA_ReadOnly) gt = gt_ds.GetRasterBand(1).ReadAsArray() dsm_ds = gdal.Open(os.path.join(gt_path, im.replace('RGB', 'DSM')), gdal.GA_ReadOnly) band_dsm = dsm_ds.GetRasterBand(1) nodata = band_dsm.GetNoDataValue() dsm = band_dsm.ReadAsArray() img[dsm==nodata] = 0 gt[dsm==nodata] = 0 d = 1 - dice(img.flatten() > .4, gt.flatten() >= 1) print(im, d) all_d.append(d) print(np.mean(all_d))
def calculate_pss(self, profile, ignore=None, method="pairwise"): """ Calculate Profiles Similarity Score. """ if len(self) != len(profile): raise ProfileError("Different profiles' lengths") prof_1 = self prof_2 = profile if ignore: for i in ignore: try: prof_1.profile = list(prof_1.profile) del prof_1.profile[prof_1.query.index(i)] prof_1.profile = tuple(prof_1.profile) except IndexError: raise ProfileError("Element to ignore not in profile") try: prof_2.profile = list(prof_2.profile) del prof_2.profile[prof_2.query.index(i)] prof_2.profile = tuple(prof_2.profile) except IndexError: raise ProfileError("Element to ignore not in profile") if method == "pairwise": return sum(a == b for a, b in zip(prof_1.profile, prof_2.profile)) elif method == "jaccard": return dist.jaccard(prof_1.profile, prof_2.profile) elif method == "yule": return dist.yule(prof_1.profile, prof_2.profile) elif method == "dice": return dist.dice(prof_1.profile, prof_2.profile) elif method == "hamming": return dist.hamming(prof_1.profile, prof_2.profile) elif method == "kulsinski": return dist.kulsinski(prof_1.profile, prof_2.profile) elif method == "rogerstanimoto": return dist.rogerstanimoto(prof_1.profile, prof_2.profile) elif method == "russellrao": return dist.russellrao(prof_1.profile, prof_2.profile) elif method == "sokalmichener": return dist.sokalmichener(prof_1.profile, prof_2.profile)
def dice_manual(archivo, id_gt): print(id_gt) file = db.session.query( modelos.Archivo).filter(modelos.Archivo.id_archivo == id_gt).first() print(file.nombre) archivo.save(archivo.filename) im = Image.open(archivo.filename) im2 = Image.open(file.nombre) im = im.convert('1') im2 = im2.convert('1') im = np.array(im) im2 = np.array(im2) im = im.flatten() im2 = im2.flatten() im = im.tolist() im2 = im2.tolist() val = distance.dice(im, im2) print(val) return str(val)
def get_performance_str(mask_estimate, mask_target, label=None): mask_estimate = mask_estimate.astype(bool) mask_target = mask_target.astype(bool) dice_score = 1 - dice(mask_estimate.flatten(), mask_target.flatten()) target = mask_target.sum() target_correct = (mask_target & mask_estimate).sum() sens = target_correct / target non_target = (~mask_target).sum() non_target_correct = (~mask_target & ~mask_estimate).sum() non_target_wrong = non_target - non_target_correct spec = non_target_correct / non_target s = f'---{label}---\n' s += f'dice: {dice_score:.3f}\n' s += f'sens: {sens:.3f} ({target_correct} of {target} vox detected correctly)\n' s += f'spec: {spec:.3f} ({non_target_wrong} of {non_target} vox detected incorrectly)\n' return s
def score_reconstructions(X, X_hat): """ Score the reconstructions using DICE, l2 error, and cross entropy :param X: :param X_hat: :return: """ D = [] L2 = [] CC = [] for i in range(X.shape[0]): try: dice_score = dice(X[i].astype(int), np.round_(X_hat[i], 0).astype(int)) except ZeroDivisionError: dice_score = 0.0 D.append(dice_score) L2.append(np.mean((X - X_hat) ** 2)) CC.append(log_loss(X, X_hat)) print 'Mean DICE Dissimilarity Score (0.0 is no dissimilarity, 1.0 is total dissimilarity): {} '.format(np.mean(D)) return D, L2, CC
def dice_co(a,b): print distance.dice(a,b)
def main(): print "# KNN Classifier" parser = ld.parse_arguments() # priting args print '\t-k = ' + str(parser.k) print '\t-d = ' + parser.distance stopwords = None if parser.stopwords_path: stopwords = ld.load_stopwords(parser.stopwords_path) voc = load_vocabulary(parser.train_path, stopwords) answers = load_answers(parser.train_path) train = transform(voc, parser.train_path) test = transform(voc, parser.test_path) # output file out_path = '../results/' + parser.distance + '_' + str(parser.k) out_path += '.txt' out_file = open(out_path, 'w') for point in test: neighbors = [] for i in xrange(len(train)): neigh = train[i] distance = 0.0 if parser.distance == 'cosine': distance = spd.cosine(neigh, point) elif parser.distance == 'jaccard': distance = spd.jaccard(neigh, point) elif parser.distance == 'euclidean': distance = spd.euclidean(neigh, point) elif parser.distance == 'dice': distance = spd.dice(neigh, point) elif parser.distance == 'correlation': distance = spd.correlation(neigh, point) elif parser.distance == 'manhattan': distance = spd.cityblock(neigh, point) else: print >> stderr, "ERRO! - Distância informada inválida." exit() tup = (distance, i) heapq.heappush(neighbors, tup) # return the highest k similar points top_k = heapq.nsmallest(parser.k, neighbors) # classifing classification = np.zeros(2) for (_, idi) in top_k: classe = answers[idi] classification[int(classe)] += 1 # outputing classification if(classification[0] >= classification[1]): print >> out_file, '0' print '0' else: print >> out_file, '1' print '1' # outputing the results' print print "# Resultados salvos no arquivo: " + out_path out_file.close() result.result("../data/imdb_test", out_path)
# # Z_in = X_in * W.T + b # # visualize_activations(layer, Z_in, labels) # # X_in = np.mat(net.blobs[layer].data) # # # Visualize inputs vs their reconstructions: mappings = tb.open_file('/projects/francisco/data/caffe/standardized/data_mappings.h5', 'r') baseline_mask = mappings.get_node('/r_datamask')[:] volmask = mappings.get_node('/r_volmask')[:] mappings.close() baseline_shape = volmask.shape X_hat = net.blobs['output_Sigmoid'].data plot_list = [] #for x in range(6): for i in [53, 6, 26, 62, 57, 9]: #i = np.random.random_integers(BATCH_SIZE) plot_list.append((X[i], 'X {}'.format(i))) plot_list.append((X_hat[i], 'X_hat {}'.format(i))) #plot_slices(plot_list, baseline_shape, baseline_mask, binarize=True) #plot_slices(plot_list, baseline_shape, baseline_mask, binarize=False) # DICE Scores D = [] for i in range(X.shape[0]): score = dice(X[i].astype(int), np.round_(X_hat[i], 0).astype(int)) D.append(score) print 'Mean DICE Dissimilarity Score (0.0 is no dissimilarity, 1.0 is exact similarity): {} '.format(np.mean(D))
out=net.forward() X_hat_ae = net.blobs['output_Sigmoid'].data # Do some PCA vs AE analysis: # Extract some principal components X_hat_pca = transform_PCA(code.data.shape[1], train_X, test_fused_X) # DICE Scores for PCA D_pca = [] for i in range(test_fused_X.shape[0]): score = dice(test_fused_X[i].astype(int), np.round_(X_hat_pca[i], 0).astype(int)) D_pca.append(score) # DICE Scores for AE D = [] for i in range(test_fused_X.shape[0]): score = dice(test_fused_X[i].astype(int), np.round_(X_hat_ae[i], 0).astype(int)) D.append(score) mappings = tb.open_file('/projects/francisco/data/caffe/standardized/data_mappings.h5', 'r') baseline_mask = mappings.get_node('/r_datamask')[:] volmask = mappings.get_node('/r_volmask')[:] mappings.close() # Visualize some random slices: ae_pca = []
Qbray=[dist.braycurtis(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Abray=[dist.braycurtis(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qcanberra=[dist.canberra(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acanberra=[dist.canberra(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qhamming=[dist.hamming(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Ahamming=[dist.hamming(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qcorrelation=[dist.correlation(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acorrelation=[dist.correlation(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qcityblock=[dist.cityblock(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acityblock=[dist.cityblock(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qdice=[dist.dice(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Adice=[dist.dice(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qyule=[dist.yule(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Ayule=[dist.yule(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] #C_Q=np.histogram2d(QuestionTVectorArray[1],QuestionTVectorArray[1])[0] #print "question mutual info-->",mutual_info_score(None,None,contigency=C_Q)#QuestionTVectorArray[0:1],QuestionTVectorArray) #QuestionVectorArray=Qvectorizer.fit_transform(all_questions).toarray() #AnswerVectorArray=Avectorizer.fit_transform(all_answers).toarray() #QUserinputVectorArray=Qvectorizer.transform(userinput).toarray() #AUserinputVectorArray=Avectorizer.transform(userinput).toarray() #cx=lambda a,b:round(np.inner(a,b)/(LA.norm(a)*LA.norm(b)),3) """
def dice_(x, y): try: return dice(x, y) except (ZeroDivisionError, TypeError): return 0
def dice_co(a,b): return distance.dice(a,b)
def predict(): net = unet_model_3d((1, 64, 64, 64)) net.load_weights("./data/logs/network_weights_loss.h5") global_tp = 0 global_fn = 0 global_fp = 0 for patient in os.listdir(path): f = h5py.File(path + patient, "r") amount_of_subvolumes = len(f["images/images"]) tp = 0 fp = 0 fn = 0 for i in range(amount_of_subvolumes): images = np.array(np.reshape(f["images/images"][i], (1, 1, 64, 64, 64))) labels = np.array(np.reshape(f["labels/labels"][i], (1, 1, 64, 64, 64))) # if len(np.nonzero(labels)[1]) == 0: # continue prediction = net.predict(images, batch_size=1, verbose=1) highly_conf_predicted = len(np.where(prediction[0][0] > 0.99)[0]) # plot(prediction, labels) # aneurysm in mask -> dice can be considered as measure if len(np.nonzero(labels)[1]) != 0: dc = 1 - distance.dice( np.reshape(labels, (-1,)), np.reshape(prediction, (-1,)) ) if dc > 0.30: # aneurysm detected correctly tp += 1 visualize_mask(prediction[0][0]) visualize_mask(labels[0][0]) else: # aneurysm not detected correctly fn += 1 visualize_mask(prediction[0][0]) visualize_mask(labels[0][0]) # no aneurysm in mask but in prediction elif highly_conf_predicted > 50: # check whether this is predicted aneurysm or random activation (check is across one axis only) max_index = np.max((np.where(prediction[0][0] > 0.99)[0])) min_index = np.min((np.where(prediction[0][0] > 0.99)[0])) if max_index - min_index < np.cbrt(highly_conf_predicted) + 5: fp += 1 # compute precision and recall per patient precision = tp + 0.0001 / (tp + fp + 0.0001) recall = tp + 0.0001 / (tp + fn + 0.0001) print("precision: " + str(precision) + " recall: " + str(recall)) global_fn += fn global_fp += fp global_tp += tp precision = global_tp / (global_tp + global_fp) recall = global_tp / (global_tp + global_fn) print("precision: " + str(precision) + " recall: " + str(recall))