예제 #1
0
def main():
    """Main function"""
    args = parse_args()

    smiles_list = uc.read_smi_file(args.input_smiles_path)

    LOG.info("Building vocabulary")
    tokenizer = mv.SMILESTokenizer()
    vocabulary = mv.create_vocabulary(smiles_list, tokenizer=tokenizer)

    tokens = vocabulary.tokens()
    LOG.info("Vocabulary contains %d tokens: %s", len(tokens), tokens)
    network_params = {
        'num_layers': args.num_layers,
        'layer_size': args.layer_size,
        'cell_type': args.cell_type,
        'embedding_layer_size': args.embedding_layer_size,
        'dropout': args.dropout
    }
    model = mm.Model(no_cuda=True,
                     vocabulary=vocabulary,
                     tokenizer=tokenizer,
                     network_params=network_params,
                     max_sequence_length=args.max_sequence_length)
    LOG.info("Saving model at %s", args.output_model_path)
    model.save(args.output_model_path)
예제 #2
0
    def run(self):
        """
        Performs the creation of the model.
        """
        if self._already_run:
            return

        LOG.info("Building vocabulary")
        tokenizer = mv.SMILESTokenizer()
        vocabulary = mv.create_vocabulary(self._smiles_list,
                                          tokenizer=tokenizer)

        tokens = vocabulary.tokens()
        LOG.info("Vocabulary contains %d tokens: %s", len(tokens), tokens)
        LOG.info("Saving model at %s", self._output_model_path)
        network_params = {
            'num_layers': self._num_layers,
            'layer_size': self._layer_size,
            'embedding_layer_size': self._embedding_layer_size,
            'dropout': self._dropout,
            'memory_cells': self._memory_cells,
            'cell_size': self._cell_size,
            'read_heads': self._read_heads,
            'num_controller_layers': self._num_controller_layers,
            'controller_type': self._controller_type,
            'model_type': self._model_type
        }
        model = mm.Model(vocabulary=vocabulary,
                         tokenizer=tokenizer,
                         network_params=network_params,
                         model_type=self._model_type,
                         max_sequence_length=self._max_sequence_length)

        model_folder = model.model_name.split('.')[0]
        storage_folder_path = os.path.join(self._output_model_path,
                                           model_folder)
        i = 0
        while os.path.exists(storage_folder_path):
            if i == 0:
                storage_folder_path += '(%s)' % i
            else:
                cut_path = storage_folder_path[:-3]
                storage_folder_path = cut_path + '(%s)' % i
            i += 1

        os.makedirs(storage_folder_path)
        self._output_model_path = os.path.join(storage_folder_path,
                                               model.model_name)
        model.model_dir = storage_folder_path

        model.save(self._output_model_path)
        LOG.info('Model saved!')
        LOG.info(model.__dict__)
예제 #3
0
    def run(self):
        """
        Carries out the creation of the model.
        """

        tokenizer = voc.SMILESTokenizer()
        vocabulary = voc.create_vocabulary(self._smiles_list, tokenizer=tokenizer)

        network_params = {
            'num_layers': self._num_layers,
            'layer_size': self._layer_size,
            'cell_type': self._cell_type,
            'embedding_layer_size': self._embedding_layer_size,
            'dropout': self._dropout,
            'layer_normalization': self._layer_normalization
        }
        model = reinvent.Model(no_cuda=True, vocabulary=vocabulary, tokenizer=tokenizer, network_params=network_params, max_sequence_length=self._max_sequence_length)
        model.save(self._output_model_path)
        return model
예제 #4
0
 def test_create(self):
     voc = mv.create_vocabulary(smiles_list=tfv.SMILES_LIST, tokenizer=mv.SMILESTokenizer())
     self.assertEqual(voc, tfv.simple())