예제 #1
0
    def _validate_config_argument(section, option, default, config_parser):
        """

        Args:
            section:
            option:
            default:
            config_parser:

        Returns:

        """
        try:
            if isinstance(default, bool):
                value = config_parser.getboolean(section,
                                                 option,
                                                 fallback=default)
            elif isinstance(default, int):
                value = config_parser.getint(section, option, fallback=default)
            elif isinstance(default, float):
                value = config_parser.getfloat(section,
                                               option,
                                               fallback=default)
            elif isinstance(default, str):
                value = config_parser.get(section, option, fallback=default)
            else:
                value = default
        except ValueError:
            LOG.warning(
                f'[{section}][{option}] configuration parameter wrongly set. '
                f'Falling back to its default value: {default}')
            value = default

        LOG.debug(f'[{section}][{option}] {value}')
        return value
예제 #2
0
    def __setattr__(self, key, value) -> None:
        """
        Overrides method to be used with slots
        Args:
            key: An object slotted property
            value: An intended value for the object key

        Returns: None

        """
        if hasattr(self, key):
            if self._preserve_property_type(getattr(self, key), value):
                super(Config, self).__setattr__(key, value)
                LOG.info(
                    f'Updating configuration parameter {key.upper()} with value {value}'
                )
                if key == USE_EXTENDED_PATTERN_SYNTAX.lower(
                ) or key == USE_GRAMMAR_OPERATORS.lower():
                    self._check_xps_op_restriction()
            else:
                LOG.warning(
                    f'Invalid data type {type(value)} for property {key}. Skipping update'
                )
        else:
            super(Config, self).__setattr__(key, value)
예제 #3
0
    def _is_solution(self) -> None:
        """
        Method to manage AES for the given RUN

        """
        if self.stats.solution_found is False:
            self.stats.sum_aes(1)
            if self.fitness_value >= self.config.success_threshold:
                LOG.debug('Solution found for this run!')
                self.stats.solution_found = True
예제 #4
0
파일: bnf.py 프로젝트: revuel/PatternOmatic
def dynamic_generator(samples: [Doc]) -> dict:
    """
    Dynamically generates a grammar in Backus Naur Form (BNF) notation representing the available Spacy NLP
    Linguistic Feature values of the given sample list of Doc instances
    Args:
        samples: List of Spacy Doc objects

    Returns: Backus Naur Form grammar notation encoded in a dictionary

    """
    config = Config()

    LOG.info(f'Generating BNF based on the following samples: {str(samples)}')

    # BNF root
    pattern_grammar = {S: [P]}

    # Watch out features of seen samples and max number of tokens per sample
    max_length_token, min_length_token, features_dict, extended_features = _features_seen(samples)

    # Update times token per pattern [Min length of tokens, Max length of tokens] interval
    pattern_grammar[P] = _symbol_stacker(T, max_length_token, min_length_token)

    # Update times features per token (Max length of features)
    pattern_grammar[T] = _symbol_stacker(F, _get_features_per_token(features_dict))

    if config.use_token_wildcard is True:
        pattern_grammar[T].append(TOKEN_WILDCARD)

    # Update available features (just the features list)
    list_of_features = list(features_dict.keys())
    if config.use_grammar_operators is True and config.use_extended_pattern_syntax is False:
        pattern_grammar = _add_grammar_operators(pattern_grammar, list_of_features)
    elif config.use_extended_pattern_syntax is True and config.use_grammar_operators is False:
        pattern_grammar = _add_extended_pattern_syntax(pattern_grammar, list_of_features, features_dict)
    else:
        pattern_grammar[F] = list_of_features

    # Update each feature possible values
    for k, v in features_dict.items():
        if config.use_extended_pattern_syntax is True:
            v.append(XPS)
        pattern_grammar.update({k: v})

    if config.use_custom_attributes is True:
        pattern_grammar = _add_custom_attributes(pattern_grammar, extended_features)

    LOG.info(f'Dynamically generated BNF: {str(pattern_grammar)}')

    return pattern_grammar
예제 #5
0
    def _check_xps_op_restriction(self) -> None:
        """
        Spacy's Grammar Operators and Quantifiers and the Spacy's Extended Pattern Syntax can not be used together  at
        the same time in a pattern for the Spacy's Rule Based Matcher.

        This method checks the provided configuration and disables the Spacy's Extended Pattern Syntax if both
        mechanisms are found enabled at the provided configuration.

        Returns: None

        """
        if hasattr(self, USE_EXTENDED_PATTERN_SYNTAX.lower()) and hasattr(self, USE_GRAMMAR_OPERATORS.lower()) and \
                self.use_extended_pattern_syntax is True and self.use_grammar_operators is True:
            LOG.warning(
                f'Extended Pattern Syntax is not compatible with the usage of Grammar Operators. '
                f'Extended Pattern Syntax has been disabled!')
            self.use_extended_pattern_syntax = False
예제 #6
0
    def _wildcard_penalty(self, contact: float) -> float:
        """
        Applies a penalty for the usage of token wildcard if usage of token wildcard is enabled
        Args:
            contact: Temporary fitness value for the current individual

        Returns: Final fitness value for the current individual

        """
        if self.config.use_token_wildcard:
            num_tokens = len(self.fenotype)
            for item in self.fenotype:
                if item == {}:
                    LOG.debug('Applying token wildcard penalty!')
                    penalty = 1 / num_tokens
                    contact -= penalty

        return contact
예제 #7
0
파일: api.py 프로젝트: revuel/PatternOmatic
def find_patterns(
    samples: List[str],
    configuration: Union[str, None] = None,
    spacy_language_model_name: Union[str,
                                     None] = None) -> List[Tuple[Any, ...]]:
    """
    Given some samples, this function finds optimized patterns to be used by the Spacy's Rule Based Matcher.
    Args:
        samples: List of strings from where to find common linguistic patterns
        configuration: (str) Optional configuration file path to to be loaded (Fallbacks to default configuration)
        spacy_language_model_name: (str) Optional valid Spacy Language Model (Fallbacks to Spacy's en_core_web_sm)

    Returns: List of patterns found and list of each pattern matching score against the samples

    """
    LOG.info(f'Loading language model {spacy_language_model_name}...')
    if 'en-core-web-sm' not in [
            d.project_name for d in pkg_resources.working_set
    ]:
        LOG.info(
            f'PatternOmatic\'s default spaCy\'s Language Model not installed,'
            f' proceeding to install en_core_web_sm, please wait...')
        spacy_download('en_core_web_sm')

    try:
        nlp = spacy_load(spacy_language_model_name)
    except OSError:
        LOG.warning(
            f'Model {spacy_language_model_name} not found, '
            f'falling back to patternOmatic\'s default language model: en_core_web_sm'
        )

        nlp = spacy_load('en_core_web_sm')

    LOG.info(f'Building Doc instances...')
    samples = [nlp(sample) for sample in samples]

    if isinstance(configuration, str):
        LOG.info(
            f'Setting up configuration from the following path: {configuration}...'
        )
        config = Config(config_file_path=configuration)
    else:
        config = Config()
        LOG.info(f'Existing Config instance found: {config}')

    stats = Stats()

    bnf_g = dgg(samples)

    LOG.info('Starting Execution...')
    for _ in range(0, config.max_runs):
        start = time.monotonic()
        p = Population(samples, bnf_g, stats)
        p.evolve()
        end = time.monotonic()
        stats.add_time(end - start)
        stats.calculate_metrics()

    LOG.info(f'Execution report {stats}')
    stats.persist()

    LOG.info(f'Best individuals for this execution:')
    stats.most_fitted_accumulator.sort(key=lambda i: i.fitness_value,
                                       reverse=True)
    for individual in stats.most_fitted_accumulator:
        LOG.info(f'{individual}')

    return list(
        zip(*[[i.fenotype, i.fitness_value]
              for i in stats.most_fitted_accumulator]))
예제 #8
0
def main(args: List) -> None:
    """
    PatternOmatic's script main function wrapper
    Args:
        args: Command Line Input Arguments

    Returns: None

    """
    LOG.info('Parsing command line arguments...')
    try:
        cli = ArgumentParser(
            description=
            'Finds the Spacy\'s Matcher pattern for the given samples',
            epilog='...using actual Artificial Intelligence')

        # Samples
        cli.add_argument('-s',
                         '--sample',
                         action='append',
                         required=True,
                         nargs='+',
                         type=str,
                         help='A sample phrase')

        # Spacy Language Model
        cli.add_argument('-l',
                         '--language',
                         nargs='?',
                         type=str,
                         default='en_core_web_sm',
                         help='Spacy language model to be used')

        # Configuration file to be used
        cli.add_argument(
            '-c',
            '--config',
            nargs='?',
            type=str,
            help='Configuration file path to be used',
            default=None,
        )

        # Parse command line input arguments/options
        parsed_args = cli.parse_args(args)

        # Join sample arguments
        for index, item in enumerate(parsed_args.sample):
            parsed_args.sample[index] = ' '.join(item)

        #
        # Find patterns
        #
        patterns_found, _ = find_patterns(
            parsed_args.sample,
            configuration=parsed_args.config,
            spacy_language_model_name=parsed_args.language)

        LOG.info(f'Patterns found: {patterns_found}')

    except Exception as ex:
        LOG.critical(f'Fatal error: {repr(ex)}')
        raise ex
예제 #9
0
    def __init__(self, config_file_path: str = None):
        """
        Config object constructor
        Args:
            config_file_path: Path for a configuration file
        """
        config_parser = configparser.ConfigParser()

        if config_file_path is None:
            LOG.warning(
                f'Configuration file not provided. Falling back to default values'
            )
            self.file_path = None
        else:
            file_list = config_parser.read(config_file_path)
            if len(file_list) == 0:
                LOG.warning(
                    f'File {config_file_path} not found. Falling back to default values'
                )
                self.file_path = None
            else:
                self.file_path = config_file_path

        #
        # GE configuration parameters
        #
        self.max_runs = self._validate_config_argument(GE, MAX_RUNS, 4,
                                                       config_parser)
        self.success_threshold = self._validate_config_argument(
            GE, SUCCESS_THRESHOLD, 0.8, config_parser)
        self.population_size = self._validate_config_argument(
            GE, POPULATION_SIZE, 10, config_parser)
        self.max_generations = self._validate_config_argument(
            GE, MAX_GENERATIONS, 3, config_parser)
        self.codon_length = self._validate_config_argument(
            GE, CODON_LENGTH, 8, config_parser)
        self.num_codons_per_individual = self._validate_config_argument(
            GE, CODONS_X_INDIVIDUAL, 4, config_parser)
        self.dna_length = self.codon_length * self.num_codons_per_individual
        self.mutation_probability = self._validate_config_argument(
            GE, MUTATION_PROBABILITY, 0.5, config_parser)
        self.offspring_max_size_factor = self._validate_config_argument(
            GE, OFFSPRING_FACTOR, 3.5, config_parser)
        self.mating_probability = self._validate_config_argument(
            GE, MATING_PROBABILITY, 0.9, config_parser)
        self.k_value = self._validate_config_argument(GE, K_VALUE, 3,
                                                      config_parser)

        #
        # GE configuration methods
        #
        self.selection_type = SelectionType(
            self._validate_config_argument(GE, SELECTION_TYPE, 0,
                                           config_parser))

        self.recombination_type = RecombinationType(
            self._validate_config_argument(GE, RECOMBINATION_TYPE, 0,
                                           config_parser))

        self.replacement_type = ReplacementType(
            self._validate_config_argument(GE, REPLACEMENT_TYPE, 0,
                                           config_parser))

        self.fitness_function_type = FitnessType(
            self._validate_config_argument(GE, FITNESS_FUNCTION_TYPE, 1,
                                           config_parser))

        #
        # BNF Grammar Generation configuration options
        #
        self.features_per_token = self._validate_config_argument(
            DGG, FEATURES_X_TOKEN, 1, config_parser)
        self.use_boolean_features = self._validate_config_argument(
            DGG, USE_BOOLEAN_FEATURES, False, config_parser)
        self.use_custom_attributes = self._validate_config_argument(
            DGG, USE_CUSTOM_ATTRIBUTES, False, config_parser)
        self.use_uniques = self._validate_config_argument(
            DGG, USE_UNIQUES, True, config_parser)
        self.use_grammar_operators = self._validate_config_argument(
            DGG, USE_GRAMMAR_OPERATORS, False, config_parser)
        self.use_token_wildcard = self._validate_config_argument(
            DGG, USE_TOKEN_WILDCARD, False, config_parser)
        self.use_extended_pattern_syntax = \
            self._validate_config_argument(DGG, USE_EXTENDED_PATTERN_SYNTAX, False, config_parser)

        #
        # Configuration validation
        #
        self._check_xps_op_restriction()

        #
        # IO
        #
        self.report_path = \
            self._validate_config_argument(IO, REPORT_PATH, '/tmp/patternomatic_report.txt', config_parser)

        self.report_format = ReportFormat(
            self._validate_config_argument(IO, REPORT_FORMAT, 0,
                                           config_parser))

        LOG.info(f'Configuration instance: {self}')
예제 #10
0
 def clear_instance(self):
     """ For testing purposes, destroy Singleton instance """
     LOG.debug('Removing config object!')
     self._instance = None
     del self._instance
예제 #11
0
 def __call__(cls, config_file_path: str = None) -> Config:
     if cls._instance is None:
         LOG.debug('Creating config object!')
         cls._instance = super().__call__(config_file_path)
     return cls._instance