예제 #1
0
    def _default_parameters(self) -> Dict[str, Any]:
        """Give the parameters that are always present in an exploration.

        Returns:
            A dictionary with the default parameters of an exploration.
        """
        analysis_engine = params.get('DataAnalysis', 'engine')
        if analysis_engine == ANALYSIS_ENGINES[1]:
            modin_engine = params.get('DataAnalysis', 'modin_engine')
            analysis_engine += f"[{modin_engine}]"
        return {
            ExplorationParameters.METHOD:
            self.__class__.__name__,
            ExplorationParameters.SENSITIVITY_MEASURE:
            str(self._sensitivity),
            ExplorationParameters.USABILITY_COST_MEASURE:
            str(self._usability_cost),
            ExplorationParameters.DATASET:
            str(self._dataset),
            ExplorationParameters.SENSITIVITY_THRESHOLD:
            (self._sensitivity_threshold),
            ExplorationParameters.ANALYSIS_ENGINE:
            analysis_engine,
            ExplorationParameters.MULTIPROCESSING:
            params.getboolean('Multiprocessing', 'explorations'),
            ExplorationParameters.FREE_CORES:
            params.getint('Multiprocessing', 'free_cores')
        }
예제 #2
0
    def setUp(self):
        # If we use the modin engine, we ignore the multiprocessing test as it
        # is incompatible with modin
        if params.get('DataAnalysis', 'engine') == 'modin.pandas':
            self.skipTest()

        self._dataset = DummyCleanDataset()
        self._sensitivity_measure = DummySensitivity()
        self._usability_cost_measure = DummyUsabilityCostMeasure()
        self._sensitivity_threshold = SENSITIVITY_THRESHOLD
        self._trace_path = TRACE_FILENAME
        self._expected_trace_path = EXPECTED_TRACE_PATH
        self._exploration = ConditionalEntropy(
            self._sensitivity_measure, self._usability_cost_measure,
            self._dataset, self._sensitivity_threshold)
        params.set('Multiprocessing', 'explorations', 'true')
예제 #3
0
def attribute_set_entropy(df_one_fp_per_browser: pd.DataFrame,
                          attribute_set: AttributeSet) -> float:
    """Compute the entropy of a dataset considering the given attribute set.

    Args:
        df_one_fp_per_browser: The dataframe with only one fingerprint per
                               browser.
        attribute_set: The non-empty attribute set that is considered when
                       computing the entropy of the fingerprints.

    Returns:
        The entropy of the fingerprints considering this attribute set.

    Raises:
        ValueError: The attribute set or the fingerprint dataset is empty.
        KeyError: An attribute is not in the fingerprint dataset.

    Note:
        This function is forced to use pandas as the data analysis engine.
    """
    # If an empty dataset of attribute set, we cannot compute the entropy
    if not attribute_set or df_one_fp_per_browser.empty:
        raise ValueError('Cannot compute the entropy considering an empty '
                         'dataset or an empty attribute set.')

    # If using modin, switch back to pandas
    if params.get('DataAnalysis', 'engine') == ANALYSIS_ENGINES[1]:
        logger.warning('The attribute_set_entropy function badly supports the '
                       'modin engine. We switch back to pandas in this '
                       'function.')
        df_one_fp_per_browser = df_one_fp_per_browser._to_pandas()

    # Project the datafame on the wanted attributes
    attribute_names = [attribute.name for attribute in attribute_set]
    projected_dataframe = df_one_fp_per_browser[attribute_names]

    # 1. Convert the values of the attributes as strings for the
    #    fingerprints containing NaN values to not be ignored
    # 2. Count the occurences of each distinct fingerprint
    # 3. Name the count column as COUNT_FIELD
    # 4. Project on the count column to obtain a Serie such that each value
    #    is the number of browsers sharing a given fingerprint
    distinct_value_count = (projected_dataframe.astype('str').value_counts(
        normalize=True, sort=False).reset_index(name=COUNT_FIELD)[COUNT_FIELD])

    return entropy(distinct_value_count, base=ENTROPY_BASE)
예제 #4
0
    def setUp(self):
        # If we use the modin engine, we ignore the multiprocessing test as it
        # is incompatible with modin
        if params.get('DataAnalysis', 'engine') == 'modin.pandas':
            self.skipTest()

        self._dataset = DummyCleanDataset()
        self._sensitivity_measure = DummySensitivity()
        self._usability_cost_measure = DummyUsabilityCostMeasure()
        self._sensitivity_threshold = SENSITIVITY_THRESHOLD
        self._trace_path = TRACE_FILENAME
        self._expected_trace_path = EXPECTED_TRACE_PATH_MULTIPATH_PRUNING_OFF
        self._pruning = PRUNING_OFF
        self._explored_paths = MULTI_EXPLR_PATHS
        self._exploration = FPSelect(self._sensitivity_measure,
                                     self._usability_cost_measure,
                                     self._dataset,
                                     self._sensitivity_threshold,
                                     explored_paths=self._explored_paths,
                                     pruning=self._pruning)
        params.set('Multiprocessing', 'explorations', 'true')
예제 #5
0
def trace_configuration():
    """Configure the trace file and the optional dataset to replay a trace."""
    global TRACE_DATA
    global FINGERPRINT_DATASET
    global REAL_TIME_EXPLORATION
    global EXPLORATION_PROCESS

    # -------------------------- POST request handle --------------------------
    if request.method == 'POST':
        # ------------------- Manage the required trace file ------------------
        # Clear the previous data if there were some
        TRACE_DATA, FINGERPRINT_DATASET = None, None
        if EXPLORATION_PROCESS:
            EXPLORATION_PROCESS.terminate()
            EXPLORATION_PROCESS = None
        REAL_TIME_EXPLORATION = None

        # Check that the trace file is in the received POST request
        trace_file_error_message = erroneous_post_file(
            request, 'trace-file', expected_extension='json')
        if trace_file_error_message:
            return render_template('trace-configuration.html')

        # Load the content of the trace file as a dictionary from the json
        try:
            TRACE_DATA = json.load(request.files['trace-file'])
        except JSONDecodeError:
            error_message = 'The trace file is not correctly formated.'
            flash(error_message, params.get('WebServer', 'flash_error_class'))
            logger.error(error_message)
            return render_template('trace-configuration.html')

        # Check the content of the trace file
        if error_message := trace_file_errors(TRACE_DATA):
            flash(error_message, params.get('WebServer', 'flash_error_class'))
            logger.error(error_message)
            return render_template('trace-configuration.html')

        logger.info('The trace is correct and set.')
        # --------- End of the management of the required trace file ----------

        # ------------ Manage the optional fingerprint dataset file -----------
        # Process the fingerprint dataset file if there is one provided
        dataset_provided = ('fingerprint-dataset' in request.files
                            and request.files['fingerprint-dataset'])
        if dataset_provided:
            # Check that the fingerprint dataset is in the POST request
            fp_dataset_error_message = erroneous_post_file(
                request, 'fingerprint-dataset', expected_extension='csv')
            if not fp_dataset_error_message:
                # Try to load the fingerprint dataset, we ignore the dataset if
                # there is an error and display a warning to the user
                try:
                    FINGERPRINT_DATASET = FingerprintDatasetFromCSVInMemory(
                        request.files['fingerprint-dataset'])
                    logger.debug('The fingerprint dataset is set.')
                except MissingMetadatasFields as mmf_error:
                    error_message = ('Ignored the fingerprint dataset due to '
                                     'the error: ' + str(mmf_error))
                    flash(error_message,
                          params.get('WebServer', 'flash_warning_class'))
                    logger.warning(error_message)
        # -- End of the management of the optional fingerprint dataset file ---

        # At the end, redirect to the trace replay page
        return redirect(url_for('trace_replay'))
예제 #6
0
def attribute_set_information(attribute_set_id: int):
    """Show information about an attribute set.

    Args:
        attribute_set_id: The id of the attribute set to show.
    """
    global TRACE_DATA
    global FINGERPRINT_DATASET
    global REAL_TIME_EXPLORATION
    logger.info('Getting the information about the attribute set '
                f'{attribute_set_id}.')

    # Check that there is an explored attribute set with this id in the
    # trace
    attribute_set_infos = None
    if attribute_set_id == -1:
        attribute_set_infos = EMPTY_NODE
    elif REAL_TIME_EXPLORATION:
        attribute_set_infos_list = (
            REAL_TIME_EXPLORATION.get_explored_attribute_sets(
                attribute_set_id, attribute_set_id + 1))
        if attribute_set_infos_list:
            attribute_set_infos = attribute_set_infos_list[0]
            attribute_set_infos['id'] = attribute_set_id
    elif TRACE_DATA:
        for explored_attr_set in TRACE_DATA['exploration']:
            if explored_attr_set['id'] == attribute_set_id:
                attribute_set_infos = explored_attr_set
                break
    else:
        error_message = ('Accessing the attribute set information page '
                         'requires a trace or a real time exploration to be '
                         'set.')
        logger.error(error_message)
        abort(HTTPStatus.NOT_FOUND, description=error_message)

    if not attribute_set_infos:
        error_message = (f'The attribute set id {attribute_set_id} was not'
                         ' found.')
        logger.error(error_message)
        abort(HTTPStatus.NOT_FOUND, description=error_message)

    # Generate the attribute set object and get the names of these attributes
    if REAL_TIME_EXPLORATION:
        attributes = AttributeSet(
            FINGERPRINT_DATASET.candidate_attributes.get_attribute_by_id(
                attribute_id)
            for attribute_id in attribute_set_infos['attributes'])
    elif TRACE_DATA:
        attributes = AttributeSet(
            Attribute(attribute_id, TRACE_DATA['attributes'][str(
                attribute_id)])
            for attribute_id in attribute_set_infos['attributes'])
    attribute_names = [attribute.name for attribute in attributes]

    # If there is a fingerprint dataset, compute the additional/optional
    # results from it (the subset for now)
    fingerprint_sample = None
    if attribute_set_id == -1:
        pass  # Avoid trying to get the subset with an empty attribute set
    elif FINGERPRINT_DATASET:
        # Collect a sample of the resulting fingerprints
        attr_subset_sample = AttributeSetSample(
            FINGERPRINT_DATASET, attributes,
            params.getint('WebServer', 'fingerprint_sample_size'))
        attr_subset_sample.execute()
        fingerprint_sample = attr_subset_sample.result
    else:
        flash(
            'Please provide a fingerprint dataset to obtain more insight on '
            'the selected attributes',
            params.get('WebServer', 'flash_info_class'))

    # Compute the textual representation of the state of this attribute set
    attribute_set_state = None
    if attribute_set_infos['state'] == State.EXPLORED:
        attribute_set_state = 'Explored'
    elif attribute_set_infos['state'] == State.PRUNED:
        attribute_set_state = 'Pruned'
    elif attribute_set_infos['state'] == State.SATISFYING:
        attribute_set_state = 'Satisfying the threshold'
    elif attribute_set_infos['state'] == State.EMPTY_NODE:
        attribute_set_state = 'Starting empty node'

    # Prepare a dictionary with the cost percentage of each dimension
    # { cost dimension => (bootstrap progress bar class,  # for pretty display
    #                      percentage of the cost of the candidate attributes)
    # }
    usability_cost_ratio = {}
    if REAL_TIME_EXPLORATION:
        candidate_attributes_infos = (
            REAL_TIME_EXPLORATION.get_explored_attribute_sets(0, 1)[0])
    elif TRACE_DATA:
        candidate_attributes_infos = TRACE_DATA['exploration'][0]
    bootstrap_progess_bars = (params.get(
        'WebServer', 'bootstrap_progess_bars').splitlines())

    # The total usability cost
    cost_percentage = (100 * attribute_set_infos['usability_cost'] /
                       candidate_attributes_infos['usability_cost'])
    usability_cost_ratio['usability'] = (bootstrap_progess_bars[0],
                                         '%.2f' % cost_percentage)

    if attribute_set_id > -1:
        # For each cost dimension except the "weighted" ones
        can_attrs_cost_explanation = candidate_attributes_infos[
            'cost_explanation']
        progress_bar_class_id = 1  # 0 already taken
        for cost_dimension, cost_value in can_attrs_cost_explanation.items():
            if cost_dimension.startswith('weighted'):
                continue
            cost_percentage = (
                100 * attribute_set_infos['cost_explanation'][cost_dimension] /
                cost_value)
            usability_cost_ratio[cost_dimension] = (
                bootstrap_progess_bars[progress_bar_class_id %
                                       len(bootstrap_progess_bars)],
                '%.2f' % cost_percentage)
            progress_bar_class_id += 1

    # Display the attribute information page
    return render_template('attribute-set-information.html',
                           attribute_set_infos=attribute_set_infos,
                           attribute_names=attribute_names,
                           attribute_set_state=attribute_set_state,
                           usability_cost_ratio=usability_cost_ratio,
                           fingerprint_sample=fingerprint_sample,
                           javascript_parameters=params)
예제 #7
0
def real_time_exploration_configuration():
    """Configure the assets for a real time exploration."""
    global TRACE_DATA
    global FINGERPRINT_DATASET
    global REAL_TIME_EXPLORATION
    global EXPLORATION_PROCESS

    # The exploration methods, sensitivity and usability cost measures
    exploration_methods = list(EXPLORATION_METHODS.keys())
    sensitivity_measures = list(SENSITIVITY_MEASURES.keys())
    usability_cost_measures = list(USABILITY_COST_MEASURES.keys())

    # We store a dictionary mapping each form field to an error message if the
    # field is invalid
    errors = {}

    # -------------------------- POST request handle --------------------------
    if request.method == 'POST':
        # Clear the previous data if there were some
        TRACE_DATA, FINGERPRINT_DATASET = None, None
        if EXPLORATION_PROCESS:
            EXPLORATION_PROCESS.terminate()
            EXPLORATION_PROCESS = None
        REAL_TIME_EXPLORATION = None

        # ------------ Manage the required fingerprint dataset file -----------
        # Check that the dataset file is in the received POST request
        fp_dataset_error_message = erroneous_post_file(
            request, 'fingerprint-dataset', expected_extension='csv')
        if fp_dataset_error_message:
            errors['fingerprint-dataset'] = fp_dataset_error_message
        # --------- End of the management of the required trace file ----------

        # ------------------ Handle the sensitivity threshold -----------------
        sens_thresh_error_message = erroneous_field(
            request, 'sensitivity-threshold',
            lambda v: v and is_str_float(v) and float(v) >= 0.0,
            'The sensitivity threshold should be a positive float.')
        if sens_thresh_error_message:
            errors['sensitivity-threshold'] = sens_thresh_error_message
        else:
            sensitivity_threshold = float(
                request.form['sensitivity-threshold'])
        # -------------- End of handle the sensitivity threshold --------------

        # ------------------- Handle the exploration method -------------------
        exploration_method_error_message = erroneous_field(
            request, 'exploration-method', lambda v: v in exploration_methods,
            'The exploration method is unknown.')
        if exploration_method_error_message:
            errors['exploration-method'] = exploration_method_error_message
        else:
            exploration_method = request.form['exploration-method']
        # --------------- End of handle the exploration method ----------------

        # ------------------ Handle the FPSelect parameters -------------------
        fpselect_method_name = exploration_methods[0]
        if exploration_method == fpselect_method_name:
            use_pruning_methods = 'use-pruning-methods' in request.form

            # Check that it is a strictly positive integer comprised in the
            # expected range
            minimum_explored_paths = params.getint(
                'WebServer', 'fpselect_minimum_explored_paths')
            maximum_explored_paths = params.getint(
                'WebServer', 'fpselect_maximum_explored_paths')
            explored_paths_error_message = erroneous_field(
                request, 'explored-paths',
                lambda v: v.isdigit() and (0 < minimum_explored_paths <= int(v)
                                           <= maximum_explored_paths),
                'The number of explored paths is required to be a strictly '
                'positive integer comprised in '
                f'[{minimum_explored_paths}; {maximum_explored_paths}].')
            if explored_paths_error_message:
                errors['explored-paths'] = explored_paths_error_message
            else:
                explored_paths = int(request.form['explored-paths'])
        # -------------- End of handle the FPSelect parameters ----------------

        # ------------------ Handle the sensitivity measure -------------------
        sensitivity_measure_error_message = erroneous_field(
            request, 'sensitivity-measure',
            lambda v: v in sensitivity_measures,
            'Unknown sensitivity measure.')
        if sensitivity_measure_error_message:
            errors['sensitivity-measure'] = sensitivity_measure_error_message
        else:
            sensitivity_measure = request.form['sensitivity-measure']
        # -------------- End of handle the sensitivity measure ----------------

        # ------------- Handle the most common fingerprints (=k) --------------
        top_k_fps_sens_meas = sensitivity_measures[0]
        if sensitivity_measure == top_k_fps_sens_meas:
            minimum_common_fps = params.getint(
                'WebServer', 'top_k_fingerprints_sensitivity_measure_min_k')
            maximum_common_fps = params.getint(
                'WebServer', 'top_k_fingerprints_sensitivity_measure_max_k')

            # Check that it is a strictly positive integer and comprised in the
            # range
            top_k_fps_error_message = erroneous_field(
                request, 'most-common-fingerprints', lambda v: v.isdigit() and
                (0 < minimum_common_fps <= int(v) <= maximum_common_fps),
                'The number of explored paths is required to be a strictly '
                'positive integer and comprised in the range'
                f'[{minimum_common_fps}; {maximum_common_fps}].')
            if top_k_fps_error_message:
                errors['most-common-fingerprints'] = top_k_fps_error_message
            else:
                most_common_fingerprints = int(
                    request.form['most-common-fingerprints'])
        # --------- End of handle the most common fingerprints (=k) -----------

        # --- Initialize the dataset (needed to process the usability costs)
        candidate_attributes = None
        try:
            FINGERPRINT_DATASET = FingerprintDatasetFromCSVInMemory(
                request.files['fingerprint-dataset'])

            # We will need the candidate attributes afterwards
            candidate_attributes = FINGERPRINT_DATASET.candidate_attributes
        except MissingMetadatasFields as mmf_error:
            error_message = str(mmf_error)
            flash(error_message, params.get('WebServer', 'flash_error_class'))
            logger.error(error_message)
            errors['fingerprint-dataset'] = error_message

        logger.debug('The fingerprint dataset is set.')

        # ----------------- Handle the usability cost measure -----------------
        # The weights of the cost dimensions
        cost_dim_weights = {}

        # Check the chosen usability cost measure
        usab_cost_meas_error_message = erroneous_field(
            request, 'usability-cost-measure',
            lambda v: v in usability_cost_measures,
            'Unknown usability cost measure.')
        if usab_cost_meas_error_message:
            errors['usability-cost-measure'] = usab_cost_meas_error_message
        else:
            usability_cost_measure = request.form['usability-cost-measure']
            # All the usability cost measures for now include the memory cost
            # and the instability cost, check these two

            # The memory cost results
            memory_file_error_message = erroneous_post_file(
                request, 'memory-cost-results', expected_extension='csv')
            if memory_file_error_message:
                errors['memory-cost-results'] = memory_file_error_message

            # The memory cost weight
            memory_weight_error_message = erroneous_field(
                request, 'memory-cost-weight',
                lambda v: v and is_str_float(v) and float(v) >= 0.0,
                'The memory cost weight should be a positive float.')
            if memory_weight_error_message:
                errors['memory-cost-weight'] = memory_weight_error_message
            else:
                cost_dim_weights[CostDimension.MEMORY] = float(
                    request.form['memory-cost-weight'])

            # Read the memory cost results
            if candidate_attributes:
                memory_cost_content = (request.files['memory-cost-results'].
                                       read().decode().splitlines())
                memory_costs = {}
                mem_file_reader = DictReader(memory_cost_content)
                for row in mem_file_reader:
                    try:
                        attribute = candidate_attributes.get_attribute_by_name(
                            row['attribute'])
                        memory_costs[attribute] = float(row['average_size'])
                    except KeyError as key_error:
                        error_message = (
                            f'The {key_error.args[0]} field is missing from '
                            'the memory cost results file.')
                        flash(error_message,
                              params.get('WebServer', 'flash_error_class'))
                        logger.error(error_message)
                        errors['memory-cost-results'] = error_message
                        break  # Exit the for loop

            # The instability cost results
            instab_file_error_message = erroneous_post_file(
                request, 'instability-cost-results', expected_extension='csv')
            if instab_file_error_message:
                errors['instability-cost-results'] = instab_file_error_message

            # The instability cost weight
            instab_weight_error_message = erroneous_field(
                request, 'instability-cost-weight',
                lambda v: v and is_str_float(v) and float(v) >= 0.0,
                'The instability cost weight should be a positive float.')
            if instab_weight_error_message:
                errors['instability-cost-weight'] = instab_weight_error_message
            else:
                cost_dim_weights[CostDimension.INSTABILITY] = float(
                    request.form['instability-cost-weight'])

            # Read the instability cost results
            if candidate_attributes:
                instability_cost_content = (
                    request.files['instability-cost-results'].read().decode(
                    ).splitlines())
                instability_costs = {}
                instability_file_reader = DictReader(instability_cost_content)
                for row in instability_file_reader:
                    try:
                        attribute = candidate_attributes.get_attribute_by_name(
                            row['attribute'])
                        instability_costs[attribute] = float(
                            row['proportion_of_changes'])
                    except KeyError as key_error:
                        error_message = (
                            f'The {key_error.args[0]} field is missing from '
                            'the instability cost results file.')
                        flash(error_message,
                              params.get('WebServer', 'flash_error_class'))
                        logger.error(error_message)
                        errors['instability-cost-results'] = error_message
                        break  # Exit the for loop

            # If there is also the collection time to consider
            mem_inst_time_usab_cost = usability_cost_measures[1]
            if usability_cost_measure == mem_inst_time_usab_cost:
                # The collection time cost results
                ct_file_err_mess = erroneous_post_file(
                    request,
                    'collection-time-cost-results',
                    expected_extension='csv')
                if ct_file_err_mess:
                    errors['collection-time-cost-results'] = ct_file_err_mess

                # The collection time cost weight
                col_time_weight_error_message = erroneous_field(
                    request, 'collection-time-cost-weight',
                    lambda v: v and is_str_float(v) and float(v) >= 0.0,
                    'The weight of the collection time cost should be a '
                    'positive float.')
                if col_time_weight_error_message:
                    errors['collection-time-cost-weight'] = (
                        col_time_weight_error_message)
                else:
                    cost_dim_weights[CostDimension.TIME] = float(
                        request.form['collection-time-cost-weight'])

                # Read the content of the collection time results
                if candidate_attributes:
                    collection_time_content = (
                        request.files['collection-time-cost-results'].read(
                        ).decode().splitlines())
                    collection_time_costs = {}
                    coll_time_file_reader = DictReader(collection_time_content)
                    for row in coll_time_file_reader:
                        try:
                            attribute = (
                                candidate_attributes.get_attribute_by_name(
                                    row['attribute']))
                            collection_time_costs[attribute] = (
                                float(row['average_collection_time']),
                                bool(row['is_asynchronous']))
                        except KeyError as key_error:
                            err_mess = (
                                f'The {key_error.args[0]} field is missing '
                                'from the collection time cost results file.')
                            flash(err_mess,
                                  params.get('WebServer', 'flash_error_class'))
                            logger.error(err_mess)
                            errors['collection-time-cost-results'] = err_mess
                            break  # Exit the for loop
        # ------------- End of handle the usability cost measure --------------

        # At the end, redirect to the real time exploration page if there are
        # no errors, otherwise redirect to the configuration page.
        if not errors:
            # --- Initialize the sensitivity measure
            sens_meas_class = SENSITIVITY_MEASURES[sensitivity_measure]
            # For now on, there is only the TopKFingerprints
            actual_sens_meas = sens_meas_class(FINGERPRINT_DATASET,
                                               most_common_fingerprints)
            logger.debug('Initialized the sensitivity measure '
                         f'{actual_sens_meas}.')

            # --- Initialize the usability cost measure
            usab_cost_meas_class = USABILITY_COST_MEASURES[
                usability_cost_measure]

            if usability_cost_measure == mem_inst_time_usab_cost:
                # Initialize the memory, instability, and collection time
                actual_usab_cost_meas = usab_cost_meas_class(
                    memory_costs, instability_costs, collection_time_costs,
                    cost_dim_weights)
            else:
                actual_usab_cost_meas = usab_cost_meas_class(
                    memory_costs, instability_costs, cost_dim_weights)
            logger.debug('Initialized the usability cost measure '
                         f'{actual_usab_cost_meas}.')

            # --- Initialize the exploration class
            exploration_class = EXPLORATION_METHODS[exploration_method]

            # If FPSelect
            if exploration_method == fpselect_method_name:
                exploration = exploration_class(actual_sens_meas,
                                                actual_usab_cost_meas,
                                                FINGERPRINT_DATASET,
                                                sensitivity_threshold,
                                                explored_paths,
                                                use_pruning_methods)
            else:
                exploration = exploration_class(actual_sens_meas,
                                                actual_usab_cost_meas,
                                                FINGERPRINT_DATASET,
                                                sensitivity_threshold)
            logger.debug(f'Initialized the exploration {exploration}.')

            # Execute the exploration in an asynchronous manner before
            REAL_TIME_EXPLORATION = exploration
            EXPLORATION_PROCESS = REAL_TIME_EXPLORATION.run_asynchronous()

            logger.debug('Redirecting to the real time exploration page')
            return redirect(url_for('real_time_exploration'))
        # -------------------- End of POST request handle ---------------------

    # Show the real time exploration configuration page
    return render_template('real-time-exploration-configuration.html',
                           params=params,
                           errors=errors,
                           exploration_methods=exploration_methods,
                           sensitivity_measures=sensitivity_measures,
                           usability_cost_measures=usability_cost_measures)
예제 #8
0
from brfast.measures.distinguishability.unicity import (AttributeSetUnicity,
                                                        UNICITY_RATE_RESULT,
                                                        UNIQUE_FPS_RESULT,
                                                        TOTAL_BROWSERS_RESULT)
from brfast.measures.sensitivity.fpselect import TopKFingerprints
from brfast.measures.usability_cost.fpselect import (CostDimension,
                                                     MemoryInstability,
                                                     MemoryInstabilityTime)
from brfast.utils.conversion import is_str_float
from brfast.webserver.files_verification import trace_file_errors
from brfast.webserver.form_validation import (erroneous_field,
                                              erroneous_post_file)

# The Flask application
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = params.get('WebServer', 'upload_folder')
app.secret_key = secrets.token_bytes(
    params.getint('WebServer', 'secret_key_size'))

# Set the exploration methods, the sensitivity measures, and the usability cost
# measures below
EXPLORATION_METHODS = {
    'FPSelect': FPSelect,
    'Entropy': Entropy,
    'Conditional entropy': ConditionalEntropy
}
SENSITIVITY_MEASURES = {'Top-k fingerprints': TopKFingerprints}
USABILITY_COST_MEASURES = {
    'Memory and instability': MemoryInstability,
    'Memory, instability, and collection time': MemoryInstabilityTime
}
예제 #9
0
"""Module containing the sensitivity measures used in the FPSelect paper."""

import importlib
from typing import List

from loguru import logger

from brfast.data.attribute import AttributeSet
from brfast.data.dataset import FingerprintDataset
from brfast.measures import SensitivityMeasure
# from measures.similarity import TODO

# Import the engine of the analysis module (pandas or modin)
from brfast.config import params

pd = importlib.import_module(params.get('DataAnalysis', 'engine'))

PROPORTION_FIELD = 'proportion'


def _get_top_k_fingerprints(dataframe: pd.DataFrame,
                            attribute_names: List[str],
                            k: int) -> pd.DataFrame:
    """Get a DataFrame with the k-most common fingerprints.

    Args:
        dataframe: The fingerprint dataset.
        attribute_names: The name of the attributes to consider.
        k: The parameter to specify the k-most common fingerprints to hold.

    Returns: