def train(self, checkpoint=None): self._load_dataset() baseline.save_vectorizers(self.get_basedir(), self.vectorizers) model = self._create_model() conll_output = self.config_params.get("conll_output", None) train_params = self.config_params['train'] train_params['checkpoint'] = checkpoint metrics = baseline.train.fit(model, self.train_data, self.valid_data, self.test_data, conll_output=conll_output, txts=self.txts, **train_params) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks() return model, metrics
def train(self, checkpoint=None): self._load_dataset() baseline.save_vectorizers(self.get_basedir(), self.vectorizers) model = self._create_model() conll_output = self.config_params.get("conll_output", None) train_params = self.config_params['train'] train_params['checkpoint'] = checkpoint metrics = baseline.train.fit(model, self.train_data, self.valid_data, self.test_data, conll_output=conll_output, txts=self.txts, **train_params) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks() return model, metrics
def train(self, checkpoint=None): self._reorganize_params() self._load_dataset() # Dont do this here! We need to move train_data elsewhere calc_lr_params(self.config_params['train'], self.train_data.steps) baseline.save_vectorizers(self.get_basedir(), self.vectorizers) model_params = self.config_params['model'] model_params['task'] = self.task_name() model_params['features'] = self._get_features() train_params = self.config_params['train'] train_params['checkpoint'] = checkpoint baseline.train.fit(model_params, self.train_data, self.valid_data, self.test_data, **train_params) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks()
def train(self): self._load_dataset() if self.config_params['train'].get('lr_scheduler_type', None) == 'zaremba': first_range = int( self.config_params['train']['start_decay_epoch'] * self.train_data.steps) self.config_params['train']['bounds'] = [first_range] + list( np.arange(self.config_params['train']['start_decay_epoch'] + 1, self.config_params['train']['epochs'] + 1, dtype=np.int32) * self.train_data.steps) baseline.save_vectorizers(self.get_basedir(), self.vectorizers) model = self._create_model() baseline.train.fit(model, self.train_data, self.valid_data, self.test_data, **self.config_params['train']) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks()
def train(self): """This method delegates to several sub-hooks in order to complete training. 1. call `_load_dataset()` which initializes the `DataFeed` fields of this class 2. call `baseline.save_vectorizers()` which write out the bound `vectorizers` fields to a file in the `basedir` 3. call `baseline.train.fit()` which executes the training procedure and yields a saved model 4. call `baseline.zip_files()` which zips all files in the `basedir` with the same `PID` as this process 5. call `_close_reporting_hooks()` which lets the reporting hooks know that the job is finished :return: Nothing """ self._load_dataset() baseline.save_vectorizers(self.get_basedir(), self.vectorizers) model = self._create_model() baseline.train.fit(model, self.train_data, self.valid_data, self.test_data, **self.config_params['train']) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks()
def train(self, checkpoint=None): self._load_dataset() if self.config_params['train'].get('lr_scheduler_type', None) == 'zaremba': first_range = int(self.config_params['train']['start_decay_epoch'] * self.train_data.steps) self.config_params['train']['bounds'] = [first_range] + list( np.arange( self.config_params['train']['start_decay_epoch'] + 1, self.config_params['train']['epochs'] + 1, dtype=np.int32 ) * self.train_data.steps ) baseline.save_vectorizers(self.get_basedir(), self.vectorizers) model = self._create_model() train_params = self.config_params['train'] train_params['checkpoint'] = checkpoint metrics = baseline.train.fit(model, self.train_data, self.valid_data, self.test_data, **train_params) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks() return model, metrics
def train(self, checkpoint=None): """This method delegates to several sub-hooks in order to complete training. 1. call `_load_dataset()` which initializes the `DataFeed` fields of this class 2. call `baseline.save_vectorizers()` which write out the bound `vectorizers` fields to a file in the `basedir` 3. call `baseline.train.fit()` which executes the training procedure and yields a saved model 4. call `baseline.zip_files()` which zips all files in the `basedir` with the same `PID` as this process 5. call `_close_reporting_hooks()` which lets the reporting hooks know that the job is finished :return: Nothing """ self._load_dataset() baseline.save_vectorizers(self.get_basedir(), self.vectorizers) model = self._create_model() train_params = self.config_params['train'] train_params['checkpoint'] = checkpoint metrics = baseline.train.fit(model, self.train_data, self.valid_data, self.test_data, **train_params) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks() return model, metrics
def train(self, checkpoint=None): self._load_dataset() baseline.save_vectorizers(self.get_basedir(), self.vectorizers) self._reorganize_params() conll_output = self.config_params.get("conll_output", None) model_params = self.config_params['model'] model_params['features'] = self._get_features() model_params['labels'] = self._get_labels() model_params['task'] = self.task_name() train_params = self.config_params['train'] train_params['checkpoint'] = checkpoint train_params['conll_output'] = conll_output train_params['txts'] = self.txts if conll_output is not None: dir_name = os.path.dirname(conll_output) if dir_name: os.makedirs(dir_name, exist_ok=True) baseline.train.fit(model_params, self.train_data, self.valid_data, self.test_data, **train_params) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks()
def train(self, checkpoint=None): """This method delegates to several sub-hooks in order to complete training. 1. call `_load_dataset()` which initializes the `DataFeed` fields of this class 2. call `baseline.save_vectorizers()` which write out the bound `vectorizers` fields to a file in the `basedir` 3. call `baseline.train.fit()` which executes the training procedure and yields a saved model 4. call `baseline.zip_files()` which zips all files in the `basedir` with the same `PID` as this process 5. call `_close_reporting_hooks()` which lets the reporting hooks know that the job is finished :return: Nothing """ self._reorganize_params() baseline.save_vectorizers(self.get_basedir(), self.vectorizers) self._load_dataset() model_params = self.config_params['model'] model_params['features'] = self._get_features() model_params['labels'] = self._get_labels() model_params['task'] = self.task_name() train_params = self.config_params['train'] train_params['checkpoint'] = checkpoint baseline.train.fit(model_params, self.train_data, self.valid_data, self.test_data, **train_params) baseline.zip_files(self.get_basedir()) self._close_reporting_hooks()