def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'BagOfWordsFromXML') self.output_path = config.get('output_directory', '') self.tasks = config.get('tasks', {}) self.minimum_wordcount = config.get('minimum_wordcount', self.minimum_wordcount) self.include_structure = config.get('include_structure', self.include_structure)
def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'BagOfWordsFromParsed') self.output_path = config.get('output_directory', '') self.tasks = config.get('tasks', {}) self.minimum_wordcount = config.get('minimum_wordcount', self.minimum_wordcount)
def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'Identify') self.output_path = config.get('output_directory', '') self.params = config.get('params', {}) if 'identifiers' in self.params: if isinstance(self.params['identifiers'], list): identifiers = self.params.get('identifiers', []) self.identifiers = [ self._locate_in_configs(i) for i in identifiers] else: # self.identifiers = glob.glob(config['identifiers']) raise Exception('identifier file list not found')
def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'Identify') self.output_path = config.get('output_directory', '') self.params = config.get('params', {}) if 'identifiers' in self.params: if isinstance(self.params['identifiers'], list): identifiers = self.params.get('identifiers', []) self.identifiers = [ self._locate_in_configs(i) for i in identifiers ] else: # self.identifiers = glob.glob(config['identifiers']) raise Exception('identifier file list not found')
def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'IdentityEDA') self.input_path = config.get('input_directory', '') self.output_path = config.get('output_directory', '') self.delimiter = config.get('delimiter', ',') self.delimiter_replace = config.get('delimiter_replace', ';;') # if the identifier_pattern: get all matches identifier_pattern = config.get('identifier_pattern', '') if identifier_pattern: identifiers = glob.glob(identifier_pattern) # if identifiers (list), use only the list values identifiers = config.get('identifiers', identifiers) # and then use our little yaml loader identifier_yaml = load_yamls(identifiers) self.aggregation_terms = ["'%s'" % y['name'].strip() for y in identifier_yaml]
def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'SimpleParliament') self.output_path = config.get('output_directory', '') self.params = config.get('params', {})
def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'Clean') self.output_path = config.get('output_directory', '')
def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'Parliament') self.output_path = config.get('output_directory', '') self.params = config.get('params', {})
def _configure(self): config = parse_yaml(self.yaml_file) config = extract_task_config(config, 'TextPreprocessing') self.output_path = config.get('output_directory', '') self.tasks = config.get('tasks', {})