Exemple #1
0
    def initialize(self, resource: Resources, configs: HParams):

        self.resource = resource

        self.word_alphabet = resource.get("word_alphabet")
        self.char_alphabet = resource.get("char_alphabet")
        self.ner_alphabet = resource.get("ner_alphabet")

        word_embedding_table = resource.get('word_embedding_table')

        self.config_model = configs.config_model
        self.config_data = configs.config_data

        self.normalize_func = utils.normalize_digit_word

        self.device = torch.device("cuda") if torch.cuda.is_available() \
            else torch.device("cpu")

        utils.set_random_seed(self.config_model.random_seed)

        self.model = BiRecurrentConvCRF(
            word_embedding_table, self.char_alphabet.size(),
            self.ner_alphabet.size(), self.config_model).to(device=self.device)

        self.optim = SGD(self.model.parameters(),
                         lr=self.config_model.learning_rate,
                         momentum=self.config_model.momentum,
                         nesterov=True)

        self.trained_epochs = 0

        self.resource.update(model=self.model)
Exemple #2
0
    def initialize(self, resources: Resources, configs: Config):
        super().initialize(resources, configs)

        self.resource = resources
        self.config_model = configs.config_model
        self.config_data = configs.config_data

        resource_path = configs.config_model.resource_dir

        keys = {
            "word_alphabet",
            "char_alphabet",
            "ner_alphabet",
            "word_embedding_table",
        }

        missing_keys = list(keys.difference(self.resource.keys()))

        self.resource.load(keys=missing_keys, path=resource_path)

        self.word_alphabet = resources.get("word_alphabet")
        self.char_alphabet = resources.get("char_alphabet")
        self.ner_alphabet = resources.get("ner_alphabet")
        word_embedding_table = resources.get("word_embedding_table")

        if resources.get("device"):
            self.device = resources.get("device")
        else:
            self.device = (torch.device("cuda") if torch.cuda.is_available()
                           else torch.device("cpu"))

        self.normalize_func = utils.normalize_digit_word

        if "model" not in self.resource.keys():

            def load_model(path):
                model = BiRecurrentConvCRF(
                    word_embedding_table,
                    self.char_alphabet.size(),
                    self.ner_alphabet.size(),
                    self.config_model,
                )

                if os.path.exists(path):
                    with open(path, "rb") as f:
                        weights = torch.load(f, map_location=self.device)
                        model.load_state_dict(weights)
                return model

            self.resource.load(keys={"model": load_model}, path=resource_path)

        self.model = resources.get("model")
        self.model.to(self.device)
        self.model.eval()

        utils.set_random_seed(self.config_model.random_seed)
Exemple #3
0
    def initialize(self, resource: Resources, configs: HParams):

        self.define_batcher()

        self.resource = resource
        self.config_model = configs.config_model
        self.config_data = configs.config_data

        resource_path = configs.config_model.resource_dir

        keys = {
            "word_alphabet", "char_alphabet", "ner_alphabet",
            "word_embedding_table"
        }

        missing_keys = list(keys.difference(self.resource.keys()))

        self.resource.load(keys=missing_keys, path=resource_path)

        self.word_alphabet = resource.get("word_alphabet")
        self.char_alphabet = resource.get("char_alphabet")
        self.ner_alphabet = resource.get("ner_alphabet")
        word_embedding_table = resource.get("word_embedding_table")

        if resource.get("device"):
            self.device = resource.get("device")
        else:
            self.device = torch.device('cuda') if torch.cuda.is_available() \
                else torch.device('cpu')

        self.normalize_func = utils.normalize_digit_word

        if "model" not in self.resource.keys():

            def load_model(path):
                model = BiRecurrentConvCRF(word_embedding_table,
                                           self.char_alphabet.size(),
                                           self.ner_alphabet.size(),
                                           self.config_model)

                if os.path.exists(path):
                    with open(path, "rb") as f:
                        weights = pickle.load(f)
                        model.load_state_dict(weights)
                return model

            self.resource.load(keys={"model": load_model})

        self.model = resource.get("model")
        self.model.to(self.device)
        self.model.eval()

        utils.set_random_seed(self.config_model.random_seed)
    def initialize(self, resources: Resources, configs: Config):
        """
        The training pipeline will run this initialization method during
        the initialization phase and send resources in as parameters.

        Args:
            resources: The resources shared in the pipeline.
            configs: configuration object for this trainer.

        Returns:

        """
        self.resource = resources

        self.word_alphabet = resources.get("word_alphabet")
        self.char_alphabet = resources.get("char_alphabet")
        self.ner_alphabet = resources.get("ner_alphabet")

        word_embedding_table = resources.get("word_embedding_table")

        self.config_model = configs.config_model
        self.config_data = configs.config_data

        self.normalize_func = utils.normalize_digit_word

        self.device = (
            torch.device("cuda")
            if torch.cuda.is_available()
            else torch.device("cpu")
        )

        utils.set_random_seed(self.config_model.random_seed)

        self.model = BiRecurrentConvCRF(
            word_embedding_table,
            self.char_alphabet.size(),
            self.ner_alphabet.size(),
            self.config_model,
        ).to(device=self.device)

        self.optim = SGD(
            self.model.parameters(),
            lr=self.config_model.learning_rate,
            momentum=self.config_model.momentum,
            nesterov=True,
        )

        self.trained_epochs = 0

        self.resource.update(model=self.model)