def __init__(self, classifier_dims, num_classes, gaussian_noise, dropout, feature_dropout, n_layers, device, n_tokens_in=512, n_tokens_out=8, **kwargs): super(FasttextCNN, self).__init__() fasttext_file = kwargs.pop( "fasttext_file", "crawl-300d-2M-subword.bin") # "wiki-news-300d-1M-subword.bin" fasttext_file_2 = kwargs.pop( "fasttext_file_2", "wiki-news-300d-1M-subword.bin") # "wiki-news-300d-1M-subword.bin" self.num_classes = num_classes self.binary = num_classes == 2 self.n_tokens_in = n_tokens_in self.n_tokens_out = n_tokens_out self.device = device self.word_masking_proba = kwargs.pop("word_masking_proba", 0.0) embedding_dims = classifier_dims // 2 internal_dims = classifier_dims self.text_model = fasttext.load_model(fasttext_file) self.text_model_2 = fasttext.load_model(fasttext_file_2) if fasttext_file_2 == "wiki-news-300d-1M-subword.bin": ft2_dim = 300 else: ft2_dim = 512 self.ft1_nn = ExpandContractV2(300, embedding_dims, dropout, feature_dropout) self.ft2_nn = ExpandContractV2(ft2_dim, embedding_dims, dropout, feature_dropout) self.bpe_nn = ExpandContractV2(300, embedding_dims, dropout, feature_dropout) self.cngram_nn = ExpandContractV2(100, embedding_dims, dropout, feature_dropout) self.input_nn = ExpandContractV2(4 * embedding_dims, internal_dims, dropout, feature_dropout) self.bpe = BPEmb(dim=300) self.cngram = CharNGram() self.featurizer = GRUFeaturizer(n_tokens_in, internal_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) self.final_layer = fb_1d_loss_builder(classifier_dims, n_tokens_out, num_classes, dropout, **kwargs) if "stored_model" in kwargs: load_stored_params(self, kwargs["stored_model"])
def test_unsupported_language(): error_class = None error_message = '' try: BPEmb(language='python') except Exception as e: error_class = e.__class__ error_message = str(e) assert error_class is ValueError assert error_message.startswith("Language 'python' not supported.")
def test_unsupported_merge_ops(): error_class = None error_message = '' try: BPEmb(merge_ops=42) except Exception as e: error_class = e.__class__ error_message = str(e) assert error_class is ValueError assert error_message.startswith("Number of '42' merge operations not " "supported.")
def test_unsupported_dim(): error_class = None error_message = '' try: BPEmb(dim=42) except Exception as e: error_class = e.__class__ error_message = str(e) assert error_class is ValueError assert error_message.startswith("Embedding dimensionality of '42' not " "supported.")
def test_bpemb(mock_urlretrieve): directory = 'tests/_test_data/bpemb/' # Make sure URL has a 200 status mock_urlretrieve.side_effect = urlretrieve_side_effect # Attempt to parse a subset of BPEmb vectors = BPEmb(cache=directory) # Our test data only contains a subset of 5 tokens assert len(vectors) == 5 # Embedding dimensionalty should be 300 by default assert len(vectors['▁the']) == 300 # Test implementation of __contains()__ assert '▁the' in vectors # Test with the unknown characters assert len(vectors['漢字']) == 300 # Clean up os.remove(os.path.join(directory, 'en.wiki.bpe.op50000.d300.w2v.txt.pt'))
def __init__(self, classifier_dims, num_classes, gaussian_noise, dropout, internal_dims, n_layers, featurizer, final_layer_builder, n_tokens_in=64, n_tokens_out=16, use_as_super=False, **kwargs): embedding_dims = 768 super(AlbertClassifer, self).__init__(classifier_dims, num_classes, embedding_dims, gaussian_noise, dropout, internal_dims, n_layers, featurizer, final_layer_builder, n_tokens_in, n_tokens_out, True, **kwargs) self.word_masking_proba = kwargs["word_masking_proba"] if "word_masking_proba" in kwargs else 0.0 self.need_fasttext = "fasttext_vector_config" in kwargs if "fasttext_vector_config" in kwargs: import fasttext ftvc = kwargs["fasttext_vector_config"] gru_layers = ftvc.pop("gru_layers", 0) fasttext_crawl = fasttext.load_model("crawl-300d-2M-subword.bin") fasttext_wiki = fasttext.load_model("wiki-news-300d-1M-subword.bin") bpe = BPEmb(dim=200) cngram = CharNGram() self.word_vectorizers = dict(fasttext_crawl=fasttext_crawl, fasttext_wiki=fasttext_wiki, bpe=bpe, cngram=cngram) crawl_nn = ExpandContract(900, embedding_dims, dropout, use_layer_norm=True, unit_norm=False, groups=(4, 4)) self.crawl_nn = crawl_nn n_tokens_in = n_tokens_in + (8 * int(self.n_tokens_in/(8*1.375) + 1)) if gru_layers > 0: lstm = nn.Sequential(GaussianNoise(gaussian_noise), nn.GRU(embedding_dims, int(embedding_dims / 2), gru_layers, batch_first=True, bidirectional=True, dropout=dropout)) pre_query_layer = nn.Sequential(lstm, LambdaLayer(lambda x: x[0]), nn.LayerNorm(embedding_dims)) else: pre_query_layer = nn.LayerNorm(embedding_dims) self.pre_query_layer = pre_query_layer if not use_as_super: model = kwargs["model"] if "model" in kwargs else 'albert-base-v2' global_dir = get_global("models_dir") model = os.path.join(global_dir, model) if model in os.listdir(global_dir) else model self.tokenizer = AutoTokenizer.from_pretrained(model) self.model = AutoModel.from_pretrained(model) print("Pick stored Model", model, "Model Class = ", type(self.model), "Tokenizer Class = ", type(self.tokenizer)) if featurizer == "cnn": self.featurizer = CNN1DFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "gru": self.featurizer = GRUFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "basic": self.featurizer = BasicFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "transformer": self.attention_drop_proba = kwargs["attention_drop_proba"] if "attention_drop_proba" in kwargs else 0.0 n_encoders = kwargs.pop("n_encoders", n_layers) n_decoders = kwargs.pop("n_decoders", n_layers) self.featurizer = TransformerFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_encoders, n_decoders, gaussian_noise, dropout, self.attention_drop_proba) else: raise NotImplementedError() self.final_layer = final_layer_builder(classifier_dims, n_tokens_out, num_classes, dropout, **kwargs) if "stored_model" in kwargs: load_stored_params(self, kwargs["stored_model"]) self.reg_layers = [(c, c.p if hasattr(c, "p") else c.sigma) for c in self.children() if c.__class__ == GaussianNoise or c.__class__ == nn.Dropout]
def __init__(self, classifier_dims, num_classes, embedding_dims, gaussian_noise, dropout, internal_dims, n_layers, featurizer, final_layer_builder, n_tokens_in=64, n_tokens_out=16, capabilities2dims=dict(), use_as_super=False, **kwargs): super(LangFeaturesModel, self).__init__(classifier_dims, num_classes, embedding_dims, gaussian_noise, dropout, internal_dims, n_layers, featurizer, final_layer_builder, n_tokens_in, n_tokens_out, use_as_super=True, **kwargs) assert "capabilities" in kwargs capabilities = kwargs["capabilities"] kwargs[ "rake_dims"] = kwargs["rake_dims"] if "rake_dims" in kwargs else 32 kwargs[ "yake_dims"] = kwargs["yake_dims"] if "yake_dims" in kwargs else 32 assert "key_phrases" not in capabilities or ( "key_phrases" in capabilities and "spacy" in capabilities) use_layer_norm = kwargs[ "use_layer_norm"] if "use_layer_norm" in kwargs else False self.capabilities = capabilities embedding_dim = 8 cap_to_dim_map = { "spacy": 128, "snlp": 32, "key_phrases": 64, "nltk": 192, "full_view": 64, "tmoji": 32, "ibm_max": 16, "gensim": 256, "fasttext_crawl": 256 } cap_to_dim_map.update(capabilities2dims) all_dims = sum([cap_to_dim_map[c] for c in capabilities]) self.cap_to_dim_map = cap_to_dim_map self.all_dims = all_dims if "spacy" in capabilities: tr = pytextrank.TextRank(token_lookback=7) self.nlp = spacy.load("en_core_web_lg", disable=[]) self.nlp.add_pipe(tr.PipelineComponent, name="textrank", last=True) spacy_in_dims = (96 * 2) + (11 * embedding_dim) + 2 self.spacy_nn = ExpandContract(spacy_in_dims, cap_to_dim_map["spacy"], dropout, use_layer_norm=use_layer_norm, groups=(2, 4)) if "fasttext_crawl" in capabilities: self.bpe = BPEmb(dim=200) self.cngram = CharNGram() fasttext_crawl_file = kwargs[ "fasttext_crawl_file"] if "fasttext_crawl_file" in kwargs else "crawl-300d-2M-subword.bin" self.crawl = fasttext.load_model(fasttext_crawl_file) self.crawl_nn = ExpandContract(200 + 300 + 100, cap_to_dim_map["fasttext_crawl"], dropout, use_layer_norm=use_layer_norm, groups=(4, 4)) if "gensim" in capabilities: gensim = [ api.load("glove-twitter-50"), api.load("glove-wiki-gigaword-50"), api.load("word2vec-google-news-300"), api.load("conceptnet-numberbatch-17-06-300") ] self.gensim = gensim self.gensim_nn = ExpandContract(400, cap_to_dim_map["gensim"], dropout, use_layer_norm=use_layer_norm, groups=(4, 4)) if "full_view" in capabilities: full_sent_in_dims = 300 self.full_sent_nn = ExpandContract(full_sent_in_dims, cap_to_dim_map["full_view"], dropout, use_layer_norm=use_layer_norm, groups=(4, 4)) if "snlp" in capabilities: import stanza self.snlp = stanza.Pipeline( 'en', processors='tokenize,pos,lemma,depparse,ner', use_gpu=False, pos_batch_size=2048) self.snlp_nn = ExpandContract(embedding_dim * 5, cap_to_dim_map["snlp"], dropout, use_layer_norm=use_layer_norm) if "key_phrases" in capabilities: import yake self.kw_extractor = yake.KeywordExtractor(lan="en", n=3, dedupLim=0.9, dedupFunc='seqm', windowsSize=3, top=10, features=None) self.key_occ_cnt_pytextrank = nn.Embedding(8, embedding_dim) nn.init.normal_(self.key_occ_cnt_pytextrank.weight, std=1 / embedding_dim) self.key_wc_pytextrank = nn.Embedding(4, embedding_dim) nn.init.normal_(self.key_wc_pytextrank.weight, std=1 / embedding_dim) yake_dims = kwargs["yake_dims"] if "yake_dims" in kwargs else 32 self.yake_dims = yake_dims self.yake_nn = ExpandContract(300, yake_dims, dropout, use_layer_norm=use_layer_norm, groups=(2, 2)) try: from multi_rake import Rake rake_dims = kwargs["rake_dims"] if "rake_dims" in kwargs else 32 self.rake_dims = rake_dims self.rake_nn = ExpandContract(300, rake_dims, dropout, use_layer_norm=use_layer_norm, groups=(2, 2)) self.rake = Rake(language_code="en") keyphrases_dim = 2 * embedding_dim + rake_dims + yake_dims except: self.rake = None keyphrases_dim = 2 * embedding_dim + yake_dims self.keyphrase_nn = ExpandContract(keyphrases_dim, cap_to_dim_map["key_phrases"], dropout, use_layer_norm=use_layer_norm, groups=(4, 4)) fasttext_file = kwargs[ "fasttext_file"] if "fasttext_file" in kwargs else "wiki-news-300d-1M-subword.bin" if not set(capabilities).isdisjoint( {"key_phrases", "full_view", "nltk"}): self.text_model = fasttext.load_model(fasttext_file) self.pdict = get_all_tags() self.tag_em = nn.Embedding(len(self.pdict) + 1, embedding_dim) nn.init.normal_(self.tag_em.weight, std=1 / embedding_dim) self.sw_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.sw_em.weight, std=1 / embedding_dim) self.sent_start_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.sent_start_em.weight, std=1 / embedding_dim) self.is_oov_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.is_oov_em.weight, std=1 / embedding_dim) self.has_digit_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.has_digit_em.weight, std=1 / embedding_dim) self.is_mask_em = nn.Embedding(2, embedding_dim) nn.init.normal_(self.is_mask_em.weight, std=1 / embedding_dim) self.w_len = nn.Embedding(16, embedding_dim) nn.init.normal_(self.w_len.weight, std=1 / embedding_dim) self.wc_emb = nn.Embedding(16, embedding_dim) nn.init.normal_(self.wc_emb.weight, std=1 / embedding_dim) if "nltk" in capabilities: import rake_nltk from textblob import TextBlob from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as VaderSentimentIntensityAnalyzer self.stop_words = set(stopwords.words('english')) self.rake_nltk = rake_nltk.Rake() self.key_wc_rake_nltk = nn.Embedding(4, embedding_dim) nn.init.normal_(self.key_wc_rake_nltk.weight, std=1 / embedding_dim) self.nltk_sid = SentimentIntensityAnalyzer() self.vader_sid = VaderSentimentIntensityAnalyzer() in_dims = 310 + 5 * embedding_dim self.nltk_nn = ExpandContract(in_dims, cap_to_dim_map["nltk"], dropout, use_layer_norm=use_layer_norm, groups=(2, 4)) if "ibm_max" in capabilities: from ..external import ModelWrapper self.ibm_max = ModelWrapper() for p in self.ibm_max.model.parameters(): p.requires_grad = False self.ibm_nn = ExpandContract(6, cap_to_dim_map["ibm_max"], dropout, use_layer_norm=use_layer_norm, groups=(1, 1)) if "tmoji" in capabilities: from torchmoji.sentence_tokenizer import SentenceTokenizer from torchmoji.model_def import torchmoji_emojis from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH with open(VOCAB_PATH, 'r') as f: maxlen = self.n_tokens_in self.vocabulary = json.load(f) self.st = SentenceTokenizer(self.vocabulary, maxlen) self.tmoji = torchmoji_emojis(PRETRAINED_PATH) for p in self.tmoji.parameters(): p.requires_grad = False self.tm_nn = ExpandContract(64, cap_to_dim_map["tmoji"], dropout, use_layer_norm=use_layer_norm, groups=(1, 1)) self.contract_nn = ExpandContract(self.all_dims, embedding_dims, dropout, use_layer_norm=True, unit_norm=False, groups=(4, 4)) if not use_as_super: if featurizer == "cnn": self.featurizer = CNN1DFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "gru": self.featurizer = GRUFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "basic": self.featurizer = BasicFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "transformer": self.attention_drop_proba = kwargs[ "attention_drop_proba"] if "attention_drop_proba" in kwargs else 0.0 n_encoders = kwargs.pop("n_encoders", n_layers) n_decoders = kwargs.pop("n_decoders", n_layers) self.featurizer = TransformerFeaturizer( n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_encoders, n_decoders, gaussian_noise, dropout, self.attention_drop_proba) else: raise NotImplementedError() self.final_layer = final_layer_builder(classifier_dims, n_tokens_out, num_classes, dropout, **kwargs) if "stored_model" in kwargs: load_stored_params(self, kwargs["stored_model"]) self.reg_layers = get_regularization_layers(self)
def _build_dataloader(self): self.val_loader = self.corpus = None if self.dataset_kind == "mnist": transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) self.dataset = MNISTBufferedDataset(self.data_dir, download=True, train=True, transform=transform) self.val_dataset = MNISTBufferedDataset(self.data_dir, download=True, transform=transform) self.train_sampler = MNISTSequenceSampler( self.dataset, sequences=self.sequences, batch_size=self.batch_size, random_mnist_images=not self.static_digit, randomize_sequence_cursors=self.randomize_sequence_cursors, noise_buffer=self.noise_buffer, use_mnist_pct=self.use_mnist_pct, max_batches=self.batches_in_epoch, ) if self.static_digit: # For static digit paradigm, val & train samplers much # match to ensure same digit prototype used for each sequence item. self.val_sampler = self.train_sampler else: self.val_sampler = MNISTSequenceSampler( self.val_dataset, sequences=self.sequences, batch_size=self.batch_size, random_mnist_images=not self.static_digit, randomize_sequence_cursors=self.randomize_sequence_cursors, noise_buffer=self.noise_buffer, use_mnist_pct=self.use_mnist_pct, max_batches=self.eval_batches_in_epoch, ) self.train_loader = DataLoader( self.dataset, batch_sampler=self.train_sampler, collate_fn=pred_sequence_collate, ) self.val_loader = DataLoader( self.val_dataset, batch_sampler=self.val_sampler, collate_fn=pred_sequence_collate, ) elif self.dataset_kind == "ptb": # Download "Penn Treebank" dataset from torchnlp.datasets import penn_treebank_dataset print("Maybe download PTB...") penn_treebank_dataset(self.data_dir + "/PTB", train=True, test=True) corpus = lang_util.Corpus(self.data_dir + "/PTB") train_sampler = PTBSequenceSampler( corpus.train, batch_size=self.batch_size, max_batches=self.batches_in_epoch, ) if self.embedding_kind == "rsm_bitwise": embedding = lang_util.BitwiseWordEmbedding().embedding_dict elif self.embedding_kind in ["bpe", "glove"]: from torchnlp.word_to_vector import BPEmb, GloVe cache_dir = self.data_dir + "/torchnlp/.word_vectors_cache" if self.embedding_kind == "bpe": vectors = BPEmb(dim=self.embed_dim, cache=cache_dir) else: vectors = GloVe(name="6B", dim=self.embed_dim, cache=cache_dir) embedding = {} for word_id, word in enumerate(corpus.dictionary.idx2word): embedding[word_id] = vectors[word] elif "ptb_fasttext" in self.embedding_kind: import fasttext # Generated via notebooks/ptb_embeddings.ipynb embedding = {} ft_model = fasttext.load_model(self.data_dir + "/embeddings/%s.bin" % self.embedding_kind) for word_id, word in enumerate(corpus.dictionary.idx2word): embedding[word_id] = torch.tensor(ft_model[word]) if self.embedding_kind: print("Loaded embedding dict (%s) with %d entries" % (self.embedding_kind, len(embedding))) collate_fn = partial(ptb_pred_sequence_collate, vector_dict=embedding) self.train_loader = DataLoader(corpus.train, batch_sampler=train_sampler, collate_fn=collate_fn) val_sampler = PTBSequenceSampler( corpus.test, batch_size=self.eval_batch_size, max_batches=self.eval_batches_in_epoch, uniform_offsets=True, ) self.val_loader = DataLoader(corpus.test, batch_sampler=val_sampler, collate_fn=collate_fn) self.corpus = corpus print("Built dataloaders...")
def _build_dataloader(self): # Extra element for sequential prediction labels self.val_loader = None if self.dataset_kind == "mnist": transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) self.dataset = MNISTBufferedDataset(self.data_dir, download=True, train=True, transform=transform) self.val_dataset = MNISTBufferedDataset(self.data_dir, download=True, transform=transform) self.train_sampler = MNISTSequenceSampler( self.dataset, sequences=self.sequences, batch_size=self.batch_size, random_mnist_images=not self.static_digit, noise_buffer=self.noise_buffer, use_mnist_pct=self.use_mnist_pct, max_batches=self.batches_in_epoch, ) if self.static_digit: # For static digit paradigm, val & train samplers much # match to ensure same digit prototype used for each sequence item. self.val_sampler = self.train_sampler else: self.val_sampler = MNISTSequenceSampler( self.val_dataset, sequences=self.sequences, batch_size=self.batch_size, random_mnist_images=not self.static_digit, noise_buffer=self.noise_buffer, use_mnist_pct=self.use_mnist_pct, max_batches=self.batches_in_epoch, ) self.train_loader = DataLoader( self.dataset, batch_sampler=self.train_sampler, collate_fn=pred_sequence_collate, ) self.val_loader = DataLoader( self.val_dataset, batch_sampler=self.val_sampler, collate_fn=pred_sequence_collate, ) elif self.dataset_kind == "ptb": # Download "Penn Treebank" dataset from torchnlp.datasets import penn_treebank_dataset penn_treebank_dataset(self.data_dir + "/PTB", train=True) corpus = lang_util.Corpus(self.data_dir + "/PTB") train_sampler = PTBSequenceSampler( corpus.train, batch_size=self.batch_size, max_batches=self.batches_in_epoch, ) if self.embedding_kind == "rsm_bitwise": embedding = lang_util.BitwiseWordEmbedding().embedding_dict elif self.embedding_kind == "bpe": from torchnlp.word_to_vector import BPEmb cache_dir = self.data_dir + "/torchnlp/.word_vectors_cache" vectors = BPEmb(dim=self.embed_dim, cache=cache_dir) embedding = {} for word_id, word in enumerate(corpus.dictionary.idx2word): embedding[word_id] = vectors[word] collate_fn = partial(ptb_pred_sequence_collate, vector_dict=embedding) self.train_loader = DataLoader(corpus.train, batch_sampler=train_sampler, collate_fn=collate_fn) val_sampler = PTBSequenceSampler( corpus.test, batch_size=self.batch_size, max_batches=self.batches_in_epoch, ) self.val_loader = DataLoader(corpus.test, batch_sampler=val_sampler, collate_fn=collate_fn)
def __init__(self, classifier_dims, num_classes, embedding_dims, gaussian_noise, dropout, internal_dims, n_layers, featurizer, final_layer_builder, n_tokens_in=64, n_tokens_out=16, use_as_super=False, **kwargs): super(Fasttext1DCNNModel, self).__init__() fasttext_file = kwargs.pop( "fasttext_file", "crawl-300d-2M-subword.bin") # "wiki-news-300d-1M-subword.bin" fasttext_model = kwargs.pop("fasttext_model", None) assert fasttext_file is not None or fasttext_model is not None or use_as_super self.num_classes = num_classes self.binary = num_classes == 2 self.auc_loss = True self.dice = True self.n_tokens_in = n_tokens_in self.n_tokens_out = n_tokens_out if not use_as_super: import fasttext if fasttext_file is not None: self.text_model = fasttext.load_model(fasttext_file) else: self.text_model = fasttext_model self.crawl_nn = ExpandContract(200 + 300 + 100, embedding_dims, dropout, use_layer_norm=True, unit_norm=False, groups=(8, 4)) self.bpe = BPEmb(dim=200) self.cngram = CharNGram() if featurizer == "cnn": self.featurizer = CNN1DFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "transformer": self.attention_drop_proba = kwargs[ "attention_drop_proba"] if "attention_drop_proba" in kwargs else 0.0 n_encoders = kwargs[ "n_encoders"] if "n_encoders" in kwargs else n_layers n_decoders = kwargs[ "n_decoders"] if "n_decoders" in kwargs else n_layers self.featurizer = TransformerFeaturizer( n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_encoders, n_decoders, gaussian_noise, dropout, self.attention_drop_proba) elif featurizer == "basic": self.featurizer = BasicFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) elif featurizer == "gru": self.featurizer = GRUFeaturizer(n_tokens_in, embedding_dims, n_tokens_out, classifier_dims, internal_dims, n_layers, gaussian_noise, dropout) else: raise NotImplementedError() loss = kwargs["loss"] if "loss" in kwargs else None self.final_layer = final_layer_builder(classifier_dims, n_tokens_out, num_classes, dropout, **kwargs) if "stored_model" in kwargs: load_stored_params(self, kwargs["stored_model"]) self.reg_layers = [ (c, c.p if hasattr(c, "p") else c.sigma) for c in self.children() if c.__class__ == GaussianNoise or c.__class__ == nn.Dropout ] self.auc_loss_coef = kwargs.pop("auc_loss_coef", 0.0) self.dice_loss_coef = kwargs.pop("dice_loss_coef", 0.0) self.auc_method = kwargs.pop("auc_method", 1) self.auc_dice_loss = get_auc_dice_loss(num_classes, self.dice_loss_coef, self.auc_loss_coef, auc_method=self.auc_method)