Exemple #1
0
    def _create_vectorizers(self):
        """Read the `features` section of the mead config.  This sections contains both embedding info and vectorizers
        Then use the vectorizer sub-section to instantiate the vectorizers and return them in a ``dict`` with name
        keyed off of the `features->name` and value of `vectorizer`

        :return: (``dict``) - A dictionary of the vectorizers keyed by feature name
        """
        self.vectorizers = {}

        features = self.config_params['features']
        self.primary_key = features[0]['name']
        for feature in self.config_params['features']:
            key = feature['name']
            if '-' in key:
                raise ValueError(
                    'Feature names cannot contain "-". Found feature named "{}"'
                    .format(key))
            if feature.get('primary', False) is True:
                self.primary_key = key
            vectorizer_section = feature.get('vectorizer', {'type': 'token1d'})
            vectorizer_section['mxlen'] = vectorizer_section.get(
                'mxlen',
                self.config_params.get('preproc', {}).get('mxlen', -1))
            vectorizer_section['mxwlen'] = vectorizer_section.get(
                'mxwlen',
                self.config_params.get('preproc', {}).get('mxwlen', -1))
            if 'transform' in vectorizer_section:
                vectorizer_section['transform_fn'] = eval(
                    vectorizer_section['transform'])
            vectorizer = baseline.create_vectorizer(**vectorizer_section)
            self.vectorizers[key] = vectorizer
Exemple #2
0
    def _create_vectorizers(self, vecs_set=None):
        """Read the `features` section of the mead config.  This sections contains both embedding info and vectorizers
        Then use the vectorizer sub-section to instantiate the vectorizers and return them in a ``dict`` with name
        keyed off of the `features->name` and value of `vectorizer`

        :return: (``dict``) - A dictionary of the vectorizers keyed by feature name
        """
        self.vectorizers = {}

        features = self.config_params['features']
        assert_unique_feature_names([f['name'] for f in features])
        self.primary_key = features[0]['name']
        for feature in self.config_params['features']:
            key = feature['name']
            if '-' in key:
                raise ValueError('Feature names cannot contain "-". Found feature named "{}"'.format(key))
            if feature.get('primary', False) is True:
                self.primary_key = key

            vectorizer_section = feature.get('vectorizer', {})
            vecs_global_config = {'type': 'token1d'}
            if 'label' in vectorizer_section:
                vecs_global_config = vecs_set.get(vectorizer_section['label'])

            vectorizer_section = {**vecs_global_config, **vectorizer_section}
            vectorizer_section['data_download_cache'] = self.data_download_cache
            vec_file = vectorizer_section.get('file')
            if vec_file:
                vec_file = SingleFileDownloader(vec_file, self.data_download_cache).download()
                vectorizer_section['file'] = vec_file
            vectorizer_section['mxlen'] = vectorizer_section.get('mxlen', self.config_params.get('preproc', {}).get('mxlen', -1))
            vectorizer_section['mxwlen'] = vectorizer_section.get('mxwlen', self.config_params.get('preproc', {}).get('mxwlen', -1))
            if 'model_file' in vectorizer_section:
                vectorizer_section['model_file'] = SingleFileDownloader(vectorizer_section['model_file'], self.data_download_cache).download()
            if 'vocab_file' in vectorizer_section:
                vectorizer_section['vocab_file'] = SingleFileDownloader(vectorizer_section['vocab_file'], self.data_download_cache).download()
            if 'transform' in vectorizer_section:
                vectorizer_section['transform_fn'] = vectorizer_section['transform']

            if 'transform_fn' in vectorizer_section and isinstance(vectorizer_section['transform_fn'], str):
                vectorizer_section['transform_fn'] = eval(vectorizer_section['transform_fn'])

            vectorizer = baseline.create_vectorizer(**vectorizer_section)
            self.vectorizers[key] = vectorizer
Exemple #3
0
    def _create_vectorizers(self):
        """Read the `features` section of the mead config.  This sections contains both embedding info and vectorizers
        Then use the vectorizer sub-section to instantiate the vectorizers and return them in a ``dict`` with name
        keyed off of the `features->name` and value of `vectorizer`

        :return: (``dict``) - A dictionary of the vectorizers keyed by feature name
        """
        self.vectorizers = {}

        features = self.config_params['features']
        self.primary_key = features[0]['name']
        for feature in self.config_params['features']:
            key = feature['name']
            if '-' in key:
                raise ValueError('Feature names cannot contain "-". Found feature named "{}"'.format(key))
            if feature.get('primary', False) is True:
                self.primary_key = key
            vectorizer_section = feature.get('vectorizer', {'type': 'token1d'})
            vectorizer_section['mxlen'] = vectorizer_section.get('mxlen', self.config_params.get('preproc', {}).get('mxlen', -1))
            vectorizer_section['mxwlen'] = vectorizer_section.get('mxwlen', self.config_params.get('preproc', {}).get('mxwlen', -1))
            if 'transform' in vectorizer_section:
                vectorizer_section['transform_fn'] = eval(vectorizer_section['transform'])
            vectorizer = baseline.create_vectorizer(**vectorizer_section)
            self.vectorizers[key] = vectorizer