""" :type: OpenAttack.utils.BertClassifier :Size: 1.23GB :Package Requirements: * transformers * pytorch Pretrained BERT model on MNLI dataset. See :py:data:`Dataset.MNLI` for detail. """ from OpenAttack.utils import make_zip_downloader, BertClassifier NAME = "Victim.BERT.MNLI" URL = "https://cdn.data.thunlp.org/TAADToolbox/victim/bert_mnli.zip" DOWNLOAD = make_zip_downloader(URL) def LOAD(path): from OpenAttack import Classifier return BertClassifier(path, 2)
""" :type: function :Size: 2.41MB Model files for pos tagger in nltk. `[code] <https://github.com/sloria/textblob-aptagger>`__ """ from OpenAttack.utils import make_zip_downloader import os NAME = "TProcess.NLTKPerceptronPosTagger" URL = "/TAADToolbox/averaged_perceptron_tagger.pickle.zip" DOWNLOAD = make_zip_downloader(URL, "averaged_perceptron_tagger.pickle") def LOAD(path): ret = __import__("nltk").tag.PerceptronTagger(load=False) ret.load("file:" + os.path.join(path, "averaged_perceptron_tagger.pickle")) return ret.tag
""" :type: OpenAttack.utils.WordVector :Size: 3GB """ import numpy as np import os from OpenAttack.utils import make_zip_downloader NAME = "AttackAssist.ChineseWord2Vec" URL = "/TAADToolbox/chinese-merge-word-embedding.txt.zip" DOWNLOAD = make_zip_downloader(URL, "chinese-merge-word-embedding.txt") def LOAD(path): from OpenAttack.attack_assist import WordEmbedding with open(os.path.join(path, "chinese-merge-word-embedding.txt"), "r", encoding="utf-8") as f: id2vec = [] word2id = {} # f.readline() for line in f.readlines(): tmp = line.strip().split(' ') word = tmp[0] embed = np.array([float(x) for x in tmp[1:]]) if len(embed) != 300: continue word2id[word] = len(word2id) id2vec.append(embed) id2vec = np.stack(id2vec) return WordEmbedding(word2id, id2vec)
""" :type: function :Size: 158.351KB Model files for nltk punkt sentence tokenizer. """ from OpenAttack.utils import make_zip_downloader import os NAME = "TProcess.NLTKSentTokenizer" URL = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/punkt.english.pickle.zip" DOWNLOAD = make_zip_downloader(URL, "english.pickle") def LOAD(path): return __import__("nltk").data.load("file:" + os.path.join(path, "english.pickle")).tokenize
""" :type: OpenAttack.utils.WordVector :Size: 61.998MB Counter-fitting Word Vectors to Linguistic Constraints. `[pdf] <https://www.aclweb.org/anthology/N16-1018.pdf>`__ """ import numpy as np import os from OpenAttack.utils import make_zip_downloader NAME = "AttackAssist.CounterFit" URL = "/TAADToolbox/counter-fitted-vectors.txt.zip" DOWNLOAD = make_zip_downloader(URL, "counter-fitted-vectors.txt") def LOAD(path): from OpenAttack.attack_assist import WordEmbedding with open(os.path.join(path, "counter-fitted-vectors.txt"), "r", encoding='utf-8') as f: id2vec = [] word2id = {} for line in f.readlines(): tmp = line.strip().split(" ") word = tmp[0] embed = np.array([float(x) for x in tmp[1:]]) if len(embed) != 300: continue word2id[word] = len(word2id) id2vec.append(embed) id2vec = np.stack(id2vec)