Ejemplo n.º 1
0
def getTopWeightedFeatures(experiment_id, inst_exp_id, instance_id, size,
                           fold_id):
    if fold_id == 'all':
        return None
    instance_id = int(instance_id)
    exp = ExperimentFactory.getFactory().fromJson(experiment_id, session)
    validation_experiment = ExperimentFactory.getFactory().fromJson(
        inst_exp_id, session)
    # get the features
    features_names, features_values = validation_experiment.getFeatures(
        instance_id)
    features_values = [float(value) for value in features_values]
    # get the pipeline with scaler and logistic model
    experiment_dir = exp.getOutputDirectory()
    if fold_id != 'None':
        experiment_dir = path.join(experiment_dir, fold_id)
    pipeline = joblib.load(path.join(experiment_dir, 'model', 'model.out'))
    # scale the features
    scaled_values = pipeline.named_steps['scaler'].transform(
        np.reshape(features_values, (1, -1)))
    weighted_values = np.multiply(scaled_values,
                                  pipeline.named_steps['model'].coef_)
    features = list(
        map(lambda name, value, w_value: (name, value, w_value),
            features_names, features_values, weighted_values[0]))
    features.sort(key=lambda tup: abs(tup[2]))
    features = features[:-int(size) - 1:-1]

    features_names = [x[0] for x in features]
    features_values = [x[1] for x in features]
    features_weighted_values = [x[2] for x in features]

    max_length = max([len(f) for f in features_names])
    if max_length > 30:
        labels = [str(i) for i in range(len(features_names))]
        tooltips = [
            features_names[i] + ' (' + str(features_values[i]) + ')'
            for i in range(len(features_names))
        ]
    else:
        labels = features_names
        tooltips = features_values
    barplot = BarPlot(labels)
    dataset = PlotDataset(features_weighted_values, None)
    dataset.setColor(colors_tools.red)
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson(tooltip_data=tooltips))
Ejemplo n.º 2
0
def getTopModelFeatures(experiment_id, size, train_test, fold_id):
    size = int(size)
    exp = ExperimentFactory.getFactory().fromJson(experiment_id, session)
    directory = exp.getOutputDirectory()
    if fold_id != 'None' and fold_id != 'all':
        directory = path.join(directory, fold_id)
    directory = path.join(directory, train_test)
    filename = path.join(directory, 'model_coefficients.csv')
    with open(filename, 'r') as f:
        coefficients_df = pd.read_csv(f, header=0, index_col=0)
        model_coefficients = list(coefficients_df['mean'])
        features_names = list(map(str, coefficients_df.index))
        coefficients = list(
            map(lambda name, coef: (name, coef), features_names,
                model_coefficients))
        coefficients.sort(key=lambda tup: abs(tup[1]))
        coefficients = coefficients[:-size - 1:-1]

        coefficients_names = [coef[0] for coef in coefficients]
        coefficients_values = [coef[1] for coef in coefficients]
        max_length = max([len(coef) for coef in coefficients_names])

        if max_length > 30:
            coefficients_ids = [str(i) for i in range(len(coefficients_names))]
            coefficients_names = [
                name.replace(' WHERE', '\nWHERE')
                for name in coefficients_names
            ]
            barplot = BarPlot(coefficients_ids)
            dataset = PlotDataset(coefficients_values, None)
            if exp.conf.featureImportance() == 'weight':
                dataset.setColor(colors_tools.red)
            barplot.addDataset(dataset)
            return jsonify(barplot.toJson(tooltip_data=coefficients_names))
        else:
            barplot = BarPlot(coefficients_names)
            dataset = PlotDataset(coefficients_values, None)
            if exp.conf.featureImportance() == 'weight':
                dataset.setColor(colors_tools.red)
            barplot.addDataset(dataset)
            return jsonify(barplot.toJson())
Ejemplo n.º 3
0
def updateCurrentExperiment(experiment_id):
    experiment = ExperimentFactory.getFactory().fromJson(
        experiment_id, secuml_conf, session)
    return experiment
Ejemplo n.º 4
0
                formatter_class=argparse.RawTextHelpFormatter)
        Experiment.projectDatasetFeturesParser(parser)
        subparsers = parser.add_subparsers(dest='strategy')
        subparsers.required = True
        factory = ActiveLearningConfFactory.getFactory()
        strategies = factory.getStrategies()
        for strategy in strategies:
            strategy_parser = subparsers.add_parser(strategy)
            factory.generateParser(strategy, strategy_parser)
        return parser

    def webTemplate(self):
        return 'ActiveLearning/active_learning.html'

    def getCurrentIteration(self):
        return experiment_db_tools.getCurrentIteration(self.session,
                                                       self.experiment_id)

    def setExperimentFromArgs(self, args):
        self.initExperiment(args.project, args.dataset,
                            experiment_name=args.exp_name)
        factory = ActiveLearningConfFactory.getFactory()
        conf = factory.fromArgs(args.strategy, args, logger=self.logger)
        self.setConf(conf, args.features_file,
                     annotations_filename=args.init_annotations_file)
        self.export()


ExperimentFactory.getFactory().registerClass('ActiveLearningExperiment',
                                             ActiveLearningExperiment)
Ejemplo n.º 5
0
            clustering_conf = ClusteringConfFactory.getFactory().fromParam(
                args.clustering_algo, params, logger=logger)
            alerts_conf = AlertsConfiguration(args.top_n_alerts,
                                              args.detection_threshold,
                                              clustering_conf,
                                              logger=logger)
            test_conf = ValidationDatasetConf(args.validation_dataset,
                                              alerts_conf=alerts_conf,
                                              logger=logger)
            conf.test_conf = test_conf
        return conf

    def setExperimentFromArgs(self, args):
        self.initExperiment(args.project,
                            args.dataset,
                            experiment_name=args.exp_name)
        factory = ClassifierConfFactory.getFactory()
        if args.model == 'AlreadyTrained':
            self.already_trained = args.model_exp_id
            conf = self.generateAlreadyTrainedConf(factory, args, self.logger)
        else:
            conf = factory.fromArgs(args.model, args, logger=self.logger)
        self.setConf(conf,
                     args.features_file,
                     annotations_filename='ground_truth.csv')
        self.export()


ExperimentFactory.getFactory().registerClass('ClassificationExperiment',
                                             ClassificationExperiment)
Ejemplo n.º 6
0

class ValidationExperiment(Experiment):
    def getKind(self):
        return 'Validation'

    def generateSuffix(self):
        suffix = ''
        return suffix

    def _checkConf(self):
        if not db_tables.hasGroundTruth(self):
            self.conf.logger.warning(
                'The validation dataset does not have ground-truth.')

    @staticmethod
    def fromJson(obj, secuml_conf):
        experiment = ValidationExperiment(secuml_conf)
        experiment.initExperiment(obj['project'], obj['dataset'], create=False)
        Experiment.expParamFromJson(experiment, obj, Configuration())
        return experiment

    def toJson(self):
        conf = Experiment.toJson(self)
        conf['__type__'] = 'ValidationExperiment'
        return conf


ExperimentFactory.getFactory().registerClass('ValidationExperiment',
                                             ValidationExperiment)
Ejemplo n.º 7
0
                                                     obj['dataset'],
                                                     session,
                                                     create=False)
        Experiment.expParamFromJson(experiment, obj, None)
        return experiment

    def toJson(self):
        conf = Experiment.toJson(self)
        conf['__type__'] = 'DescriptiveStatisticsExperiment'
        return conf

    @staticmethod
    def generateParser():
        parser = argparse.ArgumentParser(
            description='Descriptive Statistics of the Dataset')
        Experiment.projectDatasetFeturesParser(parser)
        return parser

    def webTemplate(self):
        return 'DescriptiveStatistics/descriptive_statistics.html'

    def setExperimentFromArgs(self, args):
        self.setConf(None,
                     args.features_file,
                     annotations_filename='ground_truth.csv')
        self.export()


ExperimentFactory.getFactory().registerClass('DescriptiveStatisticsExperiment',
                                             DescriptiveStatisticsExperiment)
Ejemplo n.º 8
0
    def createDimensionReductionExperiment(self, num_features):
        name = '-'.join([self.experiment_name, 'projection'])
        projection_conf = self.conf.projection_conf
        if projection_conf.num_components is not None:
            if projection_conf.num_components > num_features:
                projection_conf.num_components = num_features
        projection_exp = DimensionReductionExperiment(
            self.project,
            self.dataset,
            self.session,
            experiment_name=name,
            annotations_id=self.annotations_id,
            parent=self.experiment_id,
            logger=self.logger)
        projection_exp.setConf(projection_conf)
        projection_exp.setFeaturesFilenames(self.features_filename)
        projection_exp.createExperiment()
        projection_exp.export()
        return projection_exp

    def projectInstances(self, instances):
        projection_exp = self.createDimensionReductionExperiment(
            instances.numFeatures())
        projected_instances = projection_exp.run(instances=instances,
                                                 export=False)
        return projected_instances


ExperimentFactory.getFactory().registerClass('ClusteringExperiment',
                                             ClusteringExperiment)
Ejemplo n.º 9
0
    def toJson(self):
        conf = DimensionReductionExperiment.toJson(self)
        conf['__type__'] = 'FeatureSelectionExperiment'
        conf['conf'] = self.conf.toJson()
        return conf

    @staticmethod
    def generateParser():
        parser = argparse.ArgumentParser(description='Feature Selection.')
        algos = [
            'ChiSquare', 'Fclassif', 'MutualInfoClassif', 'VarianceFilter',
            'RecursiveFeatureElimination', 'NoneFilter'
        ]
        DimensionReductionExperiment.projectDatasetFeturesParser(parser)
        DimensionReductionExperiment.generateDimensionReductionParser(parser)
        subparsers = parser.add_subparsers(dest='algo')
        subparsers.required = True
        factory = DimensionReductionConfFactory.getFactory()
        for algo in algos:
            algo_parser = subparsers.add_parser(algo)
            factory.generateParser(algo, algo_parser)
        return parser

    def webTemplate(self):
        return 'DimensionReduction/feature_selection.html'


ExperimentFactory.getFactory().registerClass('FeatureSelectionExperiment',
                                             FeatureSelectionExperiment)
Ejemplo n.º 10
0
    @staticmethod
    def generateParser():
        parser = argparse.ArgumentParser(
            description='Rare Category Detection',
            formatter_class=argparse.RawTextHelpFormatter)
        ActiveLearningExperiment.projectDatasetFeturesParser(parser)
        factory = ActiveLearningConfFactory.getFactory()
        factory.generateParser('RareCategoryDetection', parser)
        return parser

    def webTemplate(self):
        return 'ActiveLearning/active_learning.html'

    def getCurrentIteration(self):
        return experiment_db_tools.getCurrentIteration(self.session,
                                                       self.experiment_id)

    def setExperimentFromArgs(self, args):
        factory = ActiveLearningConfFactory.getFactory()
        conf = factory.fromArgs('RareCategoryDetection',
                                args,
                                logger=self.logger)
        self.setConf(conf,
                     args.features_file,
                     annotations_filename=args.init_annotations_file)
        self.export()


ExperimentFactory.getFactory().registerClass('RareCategoryDetectionExperiment',
                                             RareCategoryDetectionExperiment)
Ejemplo n.º 11
0
        DimensionReductionExperiment.expParamFromJson(experiment, obj, conf)
        return experiment

    def toJson(self):
        conf = DimensionReductionExperiment.toJson(self)
        conf['__type__'] = 'ProjectionExperiment'
        conf['conf'] = self.conf.toJson()
        return conf

    @staticmethod
    def generateParser():
        parser = argparse.ArgumentParser(
            description='Projection of the data for data visualization.')
        DimensionReductionExperiment.projectDatasetFeturesParser(parser)
        DimensionReductionExperiment.generateDimensionReductionParser(parser)
        algos = ['Pca', 'Rca', 'Lda', 'Lmnn', 'Nca', 'Itml']
        subparsers = parser.add_subparsers(dest='algo')
        subparsers.required = True
        factory = DimensionReductionConfFactory.getFactory()
        for algo in algos:
            algo_parser = subparsers.add_parser(algo)
            factory.generateParser(algo, algo_parser)
        return parser

    def webTemplate(self):
        return 'DimensionReduction/projection.html'


ExperimentFactory.getFactory().registerClass('ProjectionExperiment',
                                             ProjectionExperiment)