def _get_details(self, reference, hypothesis, **kwargs): detail = self._init_details() matrix = get_cooccurrence_matrix(reference, hypothesis) duration = np.sum(matrix.M) rduration = np.sum(matrix.M, axis=1) hduration = np.sum(matrix.M, axis=0) # Reference entropy and reference/hypothesis cross-entropy cross_entropy = 0. entropy = 0. for i, ilabel in enumerate(matrix.iter_ilabels()): ratio = rduration[i] / duration if ratio > 0: entropy -= ratio * np.log(ratio) for j, jlabel in enumerate(matrix.iter_jlabels()): coduration = matrix[ilabel, jlabel] if coduration > 0: cross_entropy -= (coduration / duration) * \ np.log(coduration / hduration[j]) detail[HOMOGENEITY_CROSS_ENTROPY] = cross_entropy detail[HOMOGENEITY_ENTROPY] = entropy return detail
def _get_details(self, reference, hypothesis, **kwargs): detail = self._init_details() if not self.detection_error: reference = reference.crop(hypothesis.get_timeline(), mode='intersection') hypothesis = hypothesis.crop(reference.get_timeline(), mode='intersection') matrix = get_cooccurrence_matrix(reference, hypothesis) if self.per_cluster: # biggest class in each cluster detail[PURITY_CORRECT] = \ np.sum([matrix[L, K] / hypothesis.label_duration(K) for K, L in matrix.argmax(axis=0).iteritems()]) # number of clusters (as float) detail[PURITY_TOTAL] = float(matrix.shape[1]) else: if np.prod(matrix.shape): detail[PURITY_CORRECT] = np.sum(np.max(matrix.df.values, axis=0)) else: detail[PURITY_CORRECT] = 0. # total duration of clusters (with overlap) detail[PURITY_TOTAL] = np.sum([hypothesis.label_duration(K) for K in hypothesis.labels()]) return detail