def load_dataset(self, input_file: Optional[str] = None, input_dataset: Optional[Dataset] = None) -> None: logging.info(f'Loading dataset...') if input_file is None and input_dataset is None: self.input_file = helpers.find_latest_file(self.config.PATHS.data) self.dataset = helpers.load_input_file(self.input_file) # type: ignore elif input_file is None and input_dataset is not None: self.dataset = input_dataset elif input_file is not None and input_dataset is not None: logging.warning('Both an input dataset and an input Dataset object were provided. Using the object.') elif input_file is not None and input_dataset is None: self.input_file = click.format_filename(input_file) self.dataset = helpers.load_input_file(self.input_file) # type: ignore
def load_model(self, input_file: Optional[str] = None, input_model: Optional[Model] = None) -> None: logging.info(f'Loading model...') if input_file is None and input_model is None: self.input_file = helpers.find_latest_file( self.config.PATHS.models) self.model = helpers.load_input_file( self.input_file) # type: ignore elif input_file is None and input_model is not None: self.model = input_model elif input_file is not None and input_model is not None: logging.warning( 'Both an input dataset and an input Dataset object were provided. Using the object.' ) elif input_file is not None and input_model is None: self.input_file = click.format_filename(input_file) self.model = helpers.load_input_file( self.input_file) # type: ignore
def clean_data() -> List[Tuple]: for country in config.ALL_DATA: # Read country data; Doccano format data = helpers.load_input_file(country) # Clean data to fit Spacy format spacy_training_data = [(row['text'], { 'entities': [(ln[0], ln[1], ln[2].upper()) for ln in [tuple(l) for l in row['labels']]] }) for row in data] return spacy_training_data