def _get_errors(exp_id, fn_fp): if fn_fp == 'FN': predicted_value = BENIGN ground_truth = label_str_to_bool(MALICIOUS) else: predicted_value = MALICIOUS ground_truth = label_str_to_bool(BENIGN) query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.value == predicted_value) query = query.join(PredictionsAlchemy.instance) query = query.filter(InstancesAlchemy.label == ground_truth) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return _predictions_results(query)
def getAlerts(exp_id, analysis_type): exp = update_curr_exp(exp_id) # With proba ? With scores ? query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == exp_id) diadem_exp = query.one() with_proba, with_scores = diadem_exp.proba, diadem_exp.with_scoring # Get alerts query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) if with_proba: threshold = exp.exp_conf.core_conf.detection_threshold query = query.filter(PredictionsAlchemy.proba >= threshold) else: query = query.filter( PredictionsAlchemy.value == label_str_to_bool(MALICIOUS)) if analysis_type == 'topN' and (with_proba or with_scores): if with_proba: query = query.order_by(PredictionsAlchemy.proba.desc()) else: query = query.order_by(PredictionsAlchemy.score.desc()) elif analysis_type == 'random': query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return jsonify(_predictions_results(query))
def display_label(self, barplot, label): label_bool = labels_tools.label_str_to_bool(label) ranges = [[x for x in l if x['ground_truth_label'] == label_bool] for l in self.ranges] dataset = PlotDataset(list(map(len, ranges)), label) dataset.set_color(get_label_color(label)) barplot.add_dataset(dataset)
def get_ground_truth(session, dataset_id): query = session.query(GroundTruthAlchemy) query = query.filter(GroundTruthAlchemy.dataset_id == dataset_id) query = query.order_by(GroundTruthAlchemy.instance_id) res = query.all() labels = [labels_tools.label_str_to_bool(r.label) for r in res] families = [r.family for r in res] return labels, families
def get_annotated_ids(self, label='all', family=None): if label == 'all': mask = self.labels != None # NOQA: 711 else: mask = self.labels == label_str_to_bool(label) if family is not None: family_mask = self.families == family mask = np.logical_and(mask, family_mask) return self.ids.ids[mask]
def display_label(self, barplot, label): if label != 'all': label_bool = label_str_to_bool(label) ranges = [[x for x in l if x['ground_truth_label'] == label_bool] for l in self.ranges] else: ranges = self.ranges dataset = PlotDataset(np.array([len(r) for r in ranges]), label) dataset.set_color(get_label_color(label)) barplot.add_dataset(dataset)
def get_families_values(self, label='all'): if label == 'all': indexes = range(self.ids.num_instances()) else: label_b = labels_tools.label_str_to_bool(label) indexes = [i for i in range(self.ids.num_instances()) if self.labels[i] is not None and self.labels[i] == label_b] families = set([self.families[i] for i in indexes if self.families[i] is not None]) return families
def change_family_label(session, annotations_id, label, family): query = session.query(AnnotationsAlchemy) query = query.filter(AnnotationsAlchemy.label == label) query = query.filter(AnnotationsAlchemy.family == family) query = query.filter(AnnotationsAlchemy.annotations_id == annotations_id) instances = query.all() bool_label = labels_tools.label_str_to_bool(label) new_label = labels_tools.label_bool_to_str(not bool_label) for instance in instances: instance.label = new_label session.flush()
def set_predictions(self, predictions): self.predictions = predictions self.datasets = {} if not self.has_ground_truth: self.datasets['all'] = PlotDataset(predictions.scores, 'all') else: for label in [MALICIOUS, BENIGN]: label_bool = label_str_to_bool(label) scores = [ predictions.scores[i] for i in range(predictions.num_instances()) if predictions.ground_truth[i] == label_bool ] self.datasets[label] = PlotDataset(np.array(scores), label)
def _get_annotations(self, ids): annotations_type = self.annotations_conf.annotations_type if annotations_type == AnnotationsTypes.none: return Annotations(None, None, ids) if annotations_type == AnnotationsTypes.ground_truth: return self._get_ground_truth(ids) if annotations_type == AnnotationsTypes.partial: annotations = Annotations(None, None, ids) db_res = annotations_db_tools.get_annotated_instances( self.session, self.annotations_conf.annotations_id) for instance_id, label, family in db_res: annotations.set_label_family( instance_id, labels_tools.label_str_to_bool(label), family) return annotations
def getPredictionsScores(exp_id, range_, label): score_min, score_max = [float(x) for x in range_.split(' - ')] query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.score >= score_min) query = query.filter(PredictionsAlchemy.score <= score_max) query = query.order_by(PredictionsAlchemy.score.asc()) if label != 'all': query = query.join(PredictionsAlchemy.instance) query = query.filter( InstancesAlchemy.label == label_str_to_bool(label)) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return jsonify(_predictions_results(query))
def getPredictionsProbas(exp_id, index, label): index = int(index) proba_min = index * 0.1 proba_max = (index + 1) * 0.1 query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.proba >= proba_min) query = query.filter(PredictionsAlchemy.proba <= proba_max) query = query.order_by(PredictionsAlchemy.proba.asc()) if label != 'all': query = query.join(PredictionsAlchemy.instance) query = query.filter( InstancesAlchemy.label == label_str_to_bool(label)) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return jsonify(_predictions_results(query))
def updateAnnotation(exp_id, annotations_id, iter_num, instance_id, label, family, method): iter_num = None if iter_num == 'None' else int(iter_num) label = label_str_to_bool(label) annotations_db_tools.update_annotation(session, annotations_id, instance_id, label, family, iter_num, method) session.commit() if user_exp: exp = update_curr_exp(exp_id) filename = path.join(exp.output_dir(), 'user_actions.log') file_exists = path.isfile(filename) mode = 'a' if file_exists else 'w' to_print = ','.join(map(str, [datetime.datetime.now(), 'update_annotation', iter_num, instance_id, label, family, method])) with open(filename, mode) as f: f.write(to_print) return ''
def getPredictions(exp_id, predicted_value, right_wrong, multiclass): multiclass = multiclass == 'true' query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.value == predicted_value) if right_wrong != 'all': query = query.join(PredictionsAlchemy.instance) if multiclass: field = 'family' else: field = 'label' predicted_value = label_str_to_bool(predicted_value) if right_wrong == 'right': query = query.filter( getattr(InstancesAlchemy, field) == predicted_value) elif right_wrong == 'wrong': query = query.filter( getattr(InstancesAlchemy, field) != predicted_value) else: assert (False) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return jsonify(_predictions_results(query))
def get_families_values(self, label='all'): families = self.families if label != 'all': families = self.families[self.labels == label_str_to_bool(label)] return set(families[families != None]) # NOQA: 711
def num_instances(self, label='all'): if label == 'all': return self.ids.num_instances() else: mask = self.labels == label_str_to_bool(label) return np.sum(mask)
def update(self, instance_id, label, family): self.new_annotations = True self.instances.annotations.set_label_family( instance_id, labels_tools.label_str_to_bool(label), family) # Update the annotation count self.num_annotations[label] += 1