Exemple #1
0
 def train(self, checkpoint=None):
     self._load_dataset()
     baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
     model = self._create_model()
     conll_output = self.config_params.get("conll_output", None)
     train_params = self.config_params['train']
     train_params['checkpoint'] = checkpoint
     metrics = baseline.train.fit(model, self.train_data, self.valid_data, self.test_data,
                        conll_output=conll_output,
                        txts=self.txts, **train_params)
     baseline.zip_files(self.get_basedir())
     self._close_reporting_hooks()
     return model, metrics
Exemple #2
0
 def train(self, checkpoint=None):
     self._load_dataset()
     baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
     model = self._create_model()
     conll_output = self.config_params.get("conll_output", None)
     train_params = self.config_params['train']
     train_params['checkpoint'] = checkpoint
     metrics = baseline.train.fit(model, self.train_data, self.valid_data, self.test_data,
                        conll_output=conll_output,
                        txts=self.txts, **train_params)
     baseline.zip_files(self.get_basedir())
     self._close_reporting_hooks()
     return model, metrics
Exemple #3
0
    def train(self, checkpoint=None):

        self._reorganize_params()
        self._load_dataset()
        # Dont do this here!  We need to move train_data elsewhere
        calc_lr_params(self.config_params['train'], self.train_data.steps)
        baseline.save_vectorizers(self.get_basedir(), self.vectorizers)

        model_params = self.config_params['model']
        model_params['task'] = self.task_name()
        model_params['features'] = self._get_features()
        train_params = self.config_params['train']
        train_params['checkpoint'] = checkpoint
        baseline.train.fit(model_params, self.train_data, self.valid_data, self.test_data, **train_params)
        baseline.zip_files(self.get_basedir())
        self._close_reporting_hooks()
Exemple #4
0
 def train(self):
     self._load_dataset()
     if self.config_params['train'].get('lr_scheduler_type',
                                        None) == 'zaremba':
         first_range = int(
             self.config_params['train']['start_decay_epoch'] *
             self.train_data.steps)
         self.config_params['train']['bounds'] = [first_range] + list(
             np.arange(self.config_params['train']['start_decay_epoch'] + 1,
                       self.config_params['train']['epochs'] + 1,
                       dtype=np.int32) * self.train_data.steps)
     baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
     model = self._create_model()
     baseline.train.fit(model, self.train_data, self.valid_data,
                        self.test_data, **self.config_params['train'])
     baseline.zip_files(self.get_basedir())
     self._close_reporting_hooks()
Exemple #5
0
    def train(self):
        """This method delegates to several sub-hooks in order to complete training.

        1. call `_load_dataset()` which initializes the `DataFeed` fields of this class
        2. call `baseline.save_vectorizers()` which write out the bound `vectorizers` fields to a file in the `basedir`
        3. call `baseline.train.fit()` which executes the training procedure and  yields a saved model
        4. call `baseline.zip_files()` which zips all files in the `basedir` with the same `PID` as this process
        5. call `_close_reporting_hooks()` which lets the reporting hooks know that the job is finished
        :return: Nothing
        """
        self._load_dataset()
        baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
        model = self._create_model()
        baseline.train.fit(model, self.train_data, self.valid_data,
                           self.test_data, **self.config_params['train'])
        baseline.zip_files(self.get_basedir())
        self._close_reporting_hooks()
Exemple #6
0
 def train(self, checkpoint=None):
     self._load_dataset()
     if self.config_params['train'].get('lr_scheduler_type', None) == 'zaremba':
         first_range = int(self.config_params['train']['start_decay_epoch'] * self.train_data.steps)
         self.config_params['train']['bounds'] = [first_range] + list(
             np.arange(
                 self.config_params['train']['start_decay_epoch'] + 1,
                 self.config_params['train']['epochs'] + 1,
                 dtype=np.int32
             ) * self.train_data.steps
         )
     baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
     model = self._create_model()
     train_params = self.config_params['train']
     train_params['checkpoint'] = checkpoint
     metrics = baseline.train.fit(model, self.train_data, self.valid_data, self.test_data, **train_params)
     baseline.zip_files(self.get_basedir())
     self._close_reporting_hooks()
     return model, metrics
Exemple #7
0
    def train(self, checkpoint=None):
        """This method delegates to several sub-hooks in order to complete training.

        1. call `_load_dataset()` which initializes the `DataFeed` fields of this class
        2. call `baseline.save_vectorizers()` which write out the bound `vectorizers` fields to a file in the `basedir`
        3. call `baseline.train.fit()` which executes the training procedure and  yields a saved model
        4. call `baseline.zip_files()` which zips all files in the `basedir` with the same `PID` as this process
        5. call `_close_reporting_hooks()` which lets the reporting hooks know that the job is finished
        :return: Nothing
        """
        self._load_dataset()
        baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
        model = self._create_model()
        train_params = self.config_params['train']
        train_params['checkpoint'] = checkpoint

        metrics = baseline.train.fit(model, self.train_data, self.valid_data, self.test_data, **train_params)
        baseline.zip_files(self.get_basedir())
        self._close_reporting_hooks()
        return model, metrics
Exemple #8
0
    def train(self, checkpoint=None):
        self._load_dataset()
        baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
        self._reorganize_params()
        conll_output = self.config_params.get("conll_output", None)
        model_params = self.config_params['model']
        model_params['features'] = self._get_features()
        model_params['labels'] = self._get_labels()
        model_params['task'] = self.task_name()
        train_params = self.config_params['train']
        train_params['checkpoint'] = checkpoint
        train_params['conll_output'] = conll_output
        train_params['txts'] = self.txts

        if conll_output is not None:
            dir_name = os.path.dirname(conll_output)
            if dir_name:
                os.makedirs(dir_name, exist_ok=True)

        baseline.train.fit(model_params, self.train_data, self.valid_data, self.test_data, **train_params)
        baseline.zip_files(self.get_basedir())
        self._close_reporting_hooks()
Exemple #9
0
    def train(self, checkpoint=None):
        """This method delegates to several sub-hooks in order to complete training.
        1. call `_load_dataset()` which initializes the `DataFeed` fields of this class
        2. call `baseline.save_vectorizers()` which write out the bound `vectorizers` fields to a file in the `basedir`
        3. call `baseline.train.fit()` which executes the training procedure and  yields a saved model
        4. call `baseline.zip_files()` which zips all files in the `basedir` with the same `PID` as this process
        5. call `_close_reporting_hooks()` which lets the reporting hooks know that the job is finished
        :return: Nothing
        """
        self._reorganize_params()
        baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
        self._load_dataset()

        model_params = self.config_params['model']
        model_params['features'] = self._get_features()
        model_params['labels'] = self._get_labels()
        model_params['task'] = self.task_name()
        train_params = self.config_params['train']
        train_params['checkpoint'] = checkpoint
        baseline.train.fit(model_params, self.train_data, self.valid_data, self.test_data, **train_params)
        baseline.zip_files(self.get_basedir())
        self._close_reporting_hooks()