def getTopWeightedFeatures(experiment_id, inst_exp_id, instance_id, size, fold_id): if fold_id == 'all': return None instance_id = int(instance_id) exp = ExperimentFactory.getFactory().fromJson(experiment_id, session) validation_experiment = ExperimentFactory.getFactory().fromJson( inst_exp_id, session) # get the features features_names, features_values = validation_experiment.getFeatures( instance_id) features_values = [float(value) for value in features_values] # get the pipeline with scaler and logistic model experiment_dir = exp.getOutputDirectory() if fold_id != 'None': experiment_dir = path.join(experiment_dir, fold_id) pipeline = joblib.load(path.join(experiment_dir, 'model', 'model.out')) # scale the features scaled_values = pipeline.named_steps['scaler'].transform( np.reshape(features_values, (1, -1))) weighted_values = np.multiply(scaled_values, pipeline.named_steps['model'].coef_) features = list( map(lambda name, value, w_value: (name, value, w_value), features_names, features_values, weighted_values[0])) features.sort(key=lambda tup: abs(tup[2])) features = features[:-int(size) - 1:-1] features_names = [x[0] for x in features] features_values = [x[1] for x in features] features_weighted_values = [x[2] for x in features] max_length = max([len(f) for f in features_names]) if max_length > 30: labels = [str(i) for i in range(len(features_names))] tooltips = [ features_names[i] + ' (' + str(features_values[i]) + ')' for i in range(len(features_names)) ] else: labels = features_names tooltips = features_values barplot = BarPlot(labels) dataset = PlotDataset(features_weighted_values, None) dataset.setColor(colors_tools.red) barplot.addDataset(dataset) return jsonify(barplot.toJson(tooltip_data=tooltips))
def getTopModelFeatures(experiment_id, size, train_test, fold_id): size = int(size) exp = ExperimentFactory.getFactory().fromJson(experiment_id, session) directory = exp.getOutputDirectory() if fold_id != 'None' and fold_id != 'all': directory = path.join(directory, fold_id) directory = path.join(directory, train_test) filename = path.join(directory, 'model_coefficients.csv') with open(filename, 'r') as f: coefficients_df = pd.read_csv(f, header=0, index_col=0) model_coefficients = list(coefficients_df['mean']) features_names = list(map(str, coefficients_df.index)) coefficients = list( map(lambda name, coef: (name, coef), features_names, model_coefficients)) coefficients.sort(key=lambda tup: abs(tup[1])) coefficients = coefficients[:-size - 1:-1] coefficients_names = [coef[0] for coef in coefficients] coefficients_values = [coef[1] for coef in coefficients] max_length = max([len(coef) for coef in coefficients_names]) if max_length > 30: coefficients_ids = [str(i) for i in range(len(coefficients_names))] coefficients_names = [ name.replace(' WHERE', '\nWHERE') for name in coefficients_names ] barplot = BarPlot(coefficients_ids) dataset = PlotDataset(coefficients_values, None) if exp.conf.featureImportance() == 'weight': dataset.setColor(colors_tools.red) barplot.addDataset(dataset) return jsonify(barplot.toJson(tooltip_data=coefficients_names)) else: barplot = BarPlot(coefficients_names) dataset = PlotDataset(coefficients_values, None) if exp.conf.featureImportance() == 'weight': dataset.setColor(colors_tools.red) barplot.addDataset(dataset) return jsonify(barplot.toJson())
def updateCurrentExperiment(experiment_id): experiment = ExperimentFactory.getFactory().fromJson( experiment_id, secuml_conf, session) return experiment
formatter_class=argparse.RawTextHelpFormatter) Experiment.projectDatasetFeturesParser(parser) subparsers = parser.add_subparsers(dest='strategy') subparsers.required = True factory = ActiveLearningConfFactory.getFactory() strategies = factory.getStrategies() for strategy in strategies: strategy_parser = subparsers.add_parser(strategy) factory.generateParser(strategy, strategy_parser) return parser def webTemplate(self): return 'ActiveLearning/active_learning.html' def getCurrentIteration(self): return experiment_db_tools.getCurrentIteration(self.session, self.experiment_id) def setExperimentFromArgs(self, args): self.initExperiment(args.project, args.dataset, experiment_name=args.exp_name) factory = ActiveLearningConfFactory.getFactory() conf = factory.fromArgs(args.strategy, args, logger=self.logger) self.setConf(conf, args.features_file, annotations_filename=args.init_annotations_file) self.export() ExperimentFactory.getFactory().registerClass('ActiveLearningExperiment', ActiveLearningExperiment)
clustering_conf = ClusteringConfFactory.getFactory().fromParam( args.clustering_algo, params, logger=logger) alerts_conf = AlertsConfiguration(args.top_n_alerts, args.detection_threshold, clustering_conf, logger=logger) test_conf = ValidationDatasetConf(args.validation_dataset, alerts_conf=alerts_conf, logger=logger) conf.test_conf = test_conf return conf def setExperimentFromArgs(self, args): self.initExperiment(args.project, args.dataset, experiment_name=args.exp_name) factory = ClassifierConfFactory.getFactory() if args.model == 'AlreadyTrained': self.already_trained = args.model_exp_id conf = self.generateAlreadyTrainedConf(factory, args, self.logger) else: conf = factory.fromArgs(args.model, args, logger=self.logger) self.setConf(conf, args.features_file, annotations_filename='ground_truth.csv') self.export() ExperimentFactory.getFactory().registerClass('ClassificationExperiment', ClassificationExperiment)
class ValidationExperiment(Experiment): def getKind(self): return 'Validation' def generateSuffix(self): suffix = '' return suffix def _checkConf(self): if not db_tables.hasGroundTruth(self): self.conf.logger.warning( 'The validation dataset does not have ground-truth.') @staticmethod def fromJson(obj, secuml_conf): experiment = ValidationExperiment(secuml_conf) experiment.initExperiment(obj['project'], obj['dataset'], create=False) Experiment.expParamFromJson(experiment, obj, Configuration()) return experiment def toJson(self): conf = Experiment.toJson(self) conf['__type__'] = 'ValidationExperiment' return conf ExperimentFactory.getFactory().registerClass('ValidationExperiment', ValidationExperiment)
obj['dataset'], session, create=False) Experiment.expParamFromJson(experiment, obj, None) return experiment def toJson(self): conf = Experiment.toJson(self) conf['__type__'] = 'DescriptiveStatisticsExperiment' return conf @staticmethod def generateParser(): parser = argparse.ArgumentParser( description='Descriptive Statistics of the Dataset') Experiment.projectDatasetFeturesParser(parser) return parser def webTemplate(self): return 'DescriptiveStatistics/descriptive_statistics.html' def setExperimentFromArgs(self, args): self.setConf(None, args.features_file, annotations_filename='ground_truth.csv') self.export() ExperimentFactory.getFactory().registerClass('DescriptiveStatisticsExperiment', DescriptiveStatisticsExperiment)
def createDimensionReductionExperiment(self, num_features): name = '-'.join([self.experiment_name, 'projection']) projection_conf = self.conf.projection_conf if projection_conf.num_components is not None: if projection_conf.num_components > num_features: projection_conf.num_components = num_features projection_exp = DimensionReductionExperiment( self.project, self.dataset, self.session, experiment_name=name, annotations_id=self.annotations_id, parent=self.experiment_id, logger=self.logger) projection_exp.setConf(projection_conf) projection_exp.setFeaturesFilenames(self.features_filename) projection_exp.createExperiment() projection_exp.export() return projection_exp def projectInstances(self, instances): projection_exp = self.createDimensionReductionExperiment( instances.numFeatures()) projected_instances = projection_exp.run(instances=instances, export=False) return projected_instances ExperimentFactory.getFactory().registerClass('ClusteringExperiment', ClusteringExperiment)
def toJson(self): conf = DimensionReductionExperiment.toJson(self) conf['__type__'] = 'FeatureSelectionExperiment' conf['conf'] = self.conf.toJson() return conf @staticmethod def generateParser(): parser = argparse.ArgumentParser(description='Feature Selection.') algos = [ 'ChiSquare', 'Fclassif', 'MutualInfoClassif', 'VarianceFilter', 'RecursiveFeatureElimination', 'NoneFilter' ] DimensionReductionExperiment.projectDatasetFeturesParser(parser) DimensionReductionExperiment.generateDimensionReductionParser(parser) subparsers = parser.add_subparsers(dest='algo') subparsers.required = True factory = DimensionReductionConfFactory.getFactory() for algo in algos: algo_parser = subparsers.add_parser(algo) factory.generateParser(algo, algo_parser) return parser def webTemplate(self): return 'DimensionReduction/feature_selection.html' ExperimentFactory.getFactory().registerClass('FeatureSelectionExperiment', FeatureSelectionExperiment)
@staticmethod def generateParser(): parser = argparse.ArgumentParser( description='Rare Category Detection', formatter_class=argparse.RawTextHelpFormatter) ActiveLearningExperiment.projectDatasetFeturesParser(parser) factory = ActiveLearningConfFactory.getFactory() factory.generateParser('RareCategoryDetection', parser) return parser def webTemplate(self): return 'ActiveLearning/active_learning.html' def getCurrentIteration(self): return experiment_db_tools.getCurrentIteration(self.session, self.experiment_id) def setExperimentFromArgs(self, args): factory = ActiveLearningConfFactory.getFactory() conf = factory.fromArgs('RareCategoryDetection', args, logger=self.logger) self.setConf(conf, args.features_file, annotations_filename=args.init_annotations_file) self.export() ExperimentFactory.getFactory().registerClass('RareCategoryDetectionExperiment', RareCategoryDetectionExperiment)
DimensionReductionExperiment.expParamFromJson(experiment, obj, conf) return experiment def toJson(self): conf = DimensionReductionExperiment.toJson(self) conf['__type__'] = 'ProjectionExperiment' conf['conf'] = self.conf.toJson() return conf @staticmethod def generateParser(): parser = argparse.ArgumentParser( description='Projection of the data for data visualization.') DimensionReductionExperiment.projectDatasetFeturesParser(parser) DimensionReductionExperiment.generateDimensionReductionParser(parser) algos = ['Pca', 'Rca', 'Lda', 'Lmnn', 'Nca', 'Itml'] subparsers = parser.add_subparsers(dest='algo') subparsers.required = True factory = DimensionReductionConfFactory.getFactory() for algo in algos: algo_parser = subparsers.add_parser(algo) factory.generateParser(algo, algo_parser) return parser def webTemplate(self): return 'DimensionReduction/projection.html' ExperimentFactory.getFactory().registerClass('ProjectionExperiment', ProjectionExperiment)