def _create_vectorizers(self): """Read the `features` section of the mead config. This sections contains both embedding info and vectorizers Then use the vectorizer sub-section to instantiate the vectorizers and return them in a ``dict`` with name keyed off of the `features->name` and value of `vectorizer` :return: (``dict``) - A dictionary of the vectorizers keyed by feature name """ self.vectorizers = {} features = self.config_params['features'] self.primary_key = features[0]['name'] for feature in self.config_params['features']: key = feature['name'] if '-' in key: raise ValueError( 'Feature names cannot contain "-". Found feature named "{}"' .format(key)) if feature.get('primary', False) is True: self.primary_key = key vectorizer_section = feature.get('vectorizer', {'type': 'token1d'}) vectorizer_section['mxlen'] = vectorizer_section.get( 'mxlen', self.config_params.get('preproc', {}).get('mxlen', -1)) vectorizer_section['mxwlen'] = vectorizer_section.get( 'mxwlen', self.config_params.get('preproc', {}).get('mxwlen', -1)) if 'transform' in vectorizer_section: vectorizer_section['transform_fn'] = eval( vectorizer_section['transform']) vectorizer = baseline.create_vectorizer(**vectorizer_section) self.vectorizers[key] = vectorizer
def _create_vectorizers(self, vecs_set=None): """Read the `features` section of the mead config. This sections contains both embedding info and vectorizers Then use the vectorizer sub-section to instantiate the vectorizers and return them in a ``dict`` with name keyed off of the `features->name` and value of `vectorizer` :return: (``dict``) - A dictionary of the vectorizers keyed by feature name """ self.vectorizers = {} features = self.config_params['features'] assert_unique_feature_names([f['name'] for f in features]) self.primary_key = features[0]['name'] for feature in self.config_params['features']: key = feature['name'] if '-' in key: raise ValueError('Feature names cannot contain "-". Found feature named "{}"'.format(key)) if feature.get('primary', False) is True: self.primary_key = key vectorizer_section = feature.get('vectorizer', {}) vecs_global_config = {'type': 'token1d'} if 'label' in vectorizer_section: vecs_global_config = vecs_set.get(vectorizer_section['label']) vectorizer_section = {**vecs_global_config, **vectorizer_section} vectorizer_section['data_download_cache'] = self.data_download_cache vec_file = vectorizer_section.get('file') if vec_file: vec_file = SingleFileDownloader(vec_file, self.data_download_cache).download() vectorizer_section['file'] = vec_file vectorizer_section['mxlen'] = vectorizer_section.get('mxlen', self.config_params.get('preproc', {}).get('mxlen', -1)) vectorizer_section['mxwlen'] = vectorizer_section.get('mxwlen', self.config_params.get('preproc', {}).get('mxwlen', -1)) if 'model_file' in vectorizer_section: vectorizer_section['model_file'] = SingleFileDownloader(vectorizer_section['model_file'], self.data_download_cache).download() if 'vocab_file' in vectorizer_section: vectorizer_section['vocab_file'] = SingleFileDownloader(vectorizer_section['vocab_file'], self.data_download_cache).download() if 'transform' in vectorizer_section: vectorizer_section['transform_fn'] = vectorizer_section['transform'] if 'transform_fn' in vectorizer_section and isinstance(vectorizer_section['transform_fn'], str): vectorizer_section['transform_fn'] = eval(vectorizer_section['transform_fn']) vectorizer = baseline.create_vectorizer(**vectorizer_section) self.vectorizers[key] = vectorizer
def _create_vectorizers(self): """Read the `features` section of the mead config. This sections contains both embedding info and vectorizers Then use the vectorizer sub-section to instantiate the vectorizers and return them in a ``dict`` with name keyed off of the `features->name` and value of `vectorizer` :return: (``dict``) - A dictionary of the vectorizers keyed by feature name """ self.vectorizers = {} features = self.config_params['features'] self.primary_key = features[0]['name'] for feature in self.config_params['features']: key = feature['name'] if '-' in key: raise ValueError('Feature names cannot contain "-". Found feature named "{}"'.format(key)) if feature.get('primary', False) is True: self.primary_key = key vectorizer_section = feature.get('vectorizer', {'type': 'token1d'}) vectorizer_section['mxlen'] = vectorizer_section.get('mxlen', self.config_params.get('preproc', {}).get('mxlen', -1)) vectorizer_section['mxwlen'] = vectorizer_section.get('mxwlen', self.config_params.get('preproc', {}).get('mxwlen', -1)) if 'transform' in vectorizer_section: vectorizer_section['transform_fn'] = eval(vectorizer_section['transform']) vectorizer = baseline.create_vectorizer(**vectorizer_section) self.vectorizers[key] = vectorizer