def main(): """Main function""" args = parse_args() smiles_list = uc.read_smi_file(args.input_smiles_path) LOG.info("Building vocabulary") tokenizer = mv.SMILESTokenizer() vocabulary = mv.create_vocabulary(smiles_list, tokenizer=tokenizer) tokens = vocabulary.tokens() LOG.info("Vocabulary contains %d tokens: %s", len(tokens), tokens) network_params = { 'num_layers': args.num_layers, 'layer_size': args.layer_size, 'cell_type': args.cell_type, 'embedding_layer_size': args.embedding_layer_size, 'dropout': args.dropout } model = mm.Model(no_cuda=True, vocabulary=vocabulary, tokenizer=tokenizer, network_params=network_params, max_sequence_length=args.max_sequence_length) LOG.info("Saving model at %s", args.output_model_path) model.save(args.output_model_path)
def run(self): """ Performs the creation of the model. """ if self._already_run: return LOG.info("Building vocabulary") tokenizer = mv.SMILESTokenizer() vocabulary = mv.create_vocabulary(self._smiles_list, tokenizer=tokenizer) tokens = vocabulary.tokens() LOG.info("Vocabulary contains %d tokens: %s", len(tokens), tokens) LOG.info("Saving model at %s", self._output_model_path) network_params = { 'num_layers': self._num_layers, 'layer_size': self._layer_size, 'embedding_layer_size': self._embedding_layer_size, 'dropout': self._dropout, 'memory_cells': self._memory_cells, 'cell_size': self._cell_size, 'read_heads': self._read_heads, 'num_controller_layers': self._num_controller_layers, 'controller_type': self._controller_type, 'model_type': self._model_type } model = mm.Model(vocabulary=vocabulary, tokenizer=tokenizer, network_params=network_params, model_type=self._model_type, max_sequence_length=self._max_sequence_length) model_folder = model.model_name.split('.')[0] storage_folder_path = os.path.join(self._output_model_path, model_folder) i = 0 while os.path.exists(storage_folder_path): if i == 0: storage_folder_path += '(%s)' % i else: cut_path = storage_folder_path[:-3] storage_folder_path = cut_path + '(%s)' % i i += 1 os.makedirs(storage_folder_path) self._output_model_path = os.path.join(storage_folder_path, model.model_name) model.model_dir = storage_folder_path model.save(self._output_model_path) LOG.info('Model saved!') LOG.info(model.__dict__)
def run(self): """ Carries out the creation of the model. """ tokenizer = voc.SMILESTokenizer() vocabulary = voc.create_vocabulary(self._smiles_list, tokenizer=tokenizer) network_params = { 'num_layers': self._num_layers, 'layer_size': self._layer_size, 'cell_type': self._cell_type, 'embedding_layer_size': self._embedding_layer_size, 'dropout': self._dropout, 'layer_normalization': self._layer_normalization } model = reinvent.Model(no_cuda=True, vocabulary=vocabulary, tokenizer=tokenizer, network_params=network_params, max_sequence_length=self._max_sequence_length) model.save(self._output_model_path) return model
def test_create(self): voc = mv.create_vocabulary(smiles_list=tfv.SMILES_LIST, tokenizer=mv.SMILESTokenizer()) self.assertEqual(voc, tfv.simple())