Example #1
0
    def perform_outlier_detection_all_combos(self, X):
        # LOF on all features
        scores = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}}
        print "Starting anomaly detection loop"
        for key, value in X.iteritems():

            clf = IsolationForest()
            clf.fit(value)
            scores[key]['iforest'] = clf.decision_function(value)
            #print "Finished iforest"

            clf = LocalOutlierFactor(n_neighbors=20)
            clf.fit(value)
            scores[key]['lof'] = clf._decision_function(value)

            clf = DBOD()
            clf.fit(value)
            scores[key]['dbod'] = clf.decision_function_distance(value)

            scores[key]['abod'] = clf.decision_function_angle(value)

        print "Finished anomaly detection loop"
        with open(
                'clique_expansion/long_experiment/' + self.seed_user +
                '_unnormalized_scores.csv', 'w') as f:
            for domain, value in scores.iteritems():
                for type_score, all_scores in value.iteritems():
                    f.write(domain + ' ' + type_score + ',')
                    for item in all_scores:
                        f.write(str(item) + ',')
                    f.write('\n')
        combined_scores = self.combine_all(scores)
        scores = None
        new_scores = combined_scores[self.len_priors:]
        user_scores = sorted(zip(self.current_level_users, new_scores),
                             key=lambda x: x[1],
                             reverse=True)
        threshold = np.percentile(new_scores, 8)
        outliers = [u[0] for u in user_scores if u[1] <= threshold]
        return outliers