def _load_model_args(self, input_dir): args = ModelArgs() args.load(input_dir) return args
import os import sys import torch import torch.nn as nn from torch.utils.data import DataLoader, Dataset, Subset import torch.nn.functional as F from src import deepSVDD from src.deepSVDD import * from src.utils.config import Config from src.base.torchvision_dataset import TorchvisionDataset from src.utils.config import Config from src.base.base_net import BaseNet from simpletransformers.language_representation import RepresentationModel from simpletransformers.config.model_args import ModelArgs model_args = ModelArgs(max_seq_length=156) model = RepresentationModel(model_type="roberta", model_name="seyonec/PubChem10M_SMILES_BPE_396_250", use_cuda=False) def mean_pool(model, sentences): attn_mask = model._tokenize(sentences)['attention_mask'].numpy() word_vectors = model.encode_sentences(sentences, combine_strategy=0) return word_vectors def smiles2txt(dataset): ''' reading the smiles from the csv file and saves them in txt file in order to get the graph embendings from each smile'''