Exemple #1
0
def stase_metrics(df):
    """Compute STASE metrics for a given dataset"""
    M, N = df.shape
    logging.info("FILES: {0}, ANTIVIRUS: {1}".format(M, N))

    clusters = df.stack().value_counts()
    logging.info("clustering DONE")
    
    O = clusters.size
    logging.info("LABELS: {0}".format(O))
    
    apps = rows_stats(df)
    logging.info("rowstats computation DONE")

    avs = cols_stats(df)
    logging.info("colstats computation DONE")

    m = pd.Series()
    m['equiponderance'], m['equiponderance_idx'] = ouroboros(avs['positives'], True)
    m['exclusivity'] = avs['alones'].sum() / M
    m['recognition'] = apps['positives'].mean() / N
    m['synchronicity'] = np.mean(avs['overlap'])
    m['genericity'] = 1 - (O-1) / (avs['positives'].sum()-1)
    m['uniformity'], m['uniformity_idx'] = ouroboros(clusters, True)
    m['divergence'] = (apps['distincts'].sum() - M) / (apps['positives'].sum() - M)
    m['consensuality'] = (apps['max'].sum() - M) / (apps['positives'].sum() - M)
    m['resemblance'] = np.mean(apps['resemblance'].dropna())
    m['avs'] = N
    m['apps'] = M
    m['labels'] = O

    logging.info("metrics computation DONE")
    
    return m, apps, avs, clusters
 def test_against_gini(self):
     print("{0:<15} || {1:<s} | {2:<4s}".format("ARRAY", "OURO", "GINI"))
     for array, _, _ in self.cases:
         orb = ouroboros(array)
         gin = self._gini(array)
         arr = ','.join([str(a) for a in array])
         print("{0:<15} || {1:<4.2f} | {2:<4.2f}".format(arr, orb, gin))
 def test_standalone(self):
     for array, index, indice in self.cases:
         idx, idc = ouroboros(array, True)
         self.assertAlmostEqual(idx, index, 2, msg="Array: {0}".format(array))
         self.assertEqual(idc, indice, msg="Array: {0}".format(array))