def __init__( self, embed: nn.Embedding = None, hidden_size: int = 200, dropout: float = 0.1, layer: str = "lstm", z_rnn_size: int = 30, ): super(DependentLatentModel, self).__init__() self.layer = layer emb_size = embed.weight.shape[1] enc_size = hidden_size * 2 self.embed_layer = nn.Sequential(embed, nn.Dropout(p=dropout)) self.enc_layer = get_encoder(layer, emb_size, hidden_size) if layer == "rcnn": self.z_cell = RCNNCell(enc_size + 1, z_rnn_size) else: self.z_cell = LSTMCell(enc_size + 1, z_rnn_size) self.z_layer = KumaGate(enc_size + z_rnn_size) self.z = None # z samples self.z_dists = [] # z distribution(s) self.report_params()
def __init__(self, embed: nn.Embedding = None, hidden_size: int = 200, dropout: float = 0.1, layer: str = "rcnn", distribution: str = "kuma"): super(IndependentLatentModel, self).__init__() self.layer = layer emb_size = embed.weight.shape[1] enc_size = hidden_size * 2 self.embed_layer = nn.Sequential(embed, nn.Dropout(p=dropout)) self.enc_layer = get_encoder(layer, emb_size, hidden_size) if distribution == "kuma": self.z_layer = KumaGate(enc_size) else: raise ValueError("unknown distribution") self.z = None # z samples self.z_dists = [] # z distribution(s) self.report_params()
def __init__(self, embed: nn.Embedding = None, hidden_size: int = 200, output_size: int = 1, dropout: float = 0.1, layer: str = "lstm", nonlinearity: str = "sigmoid" ): super(Classifier, self).__init__() emb_size = embed.weight.shape[1] self.embed_layer = nn.Sequential( embed, nn.Dropout(p=dropout) ) self.enc_layer = get_encoder(layer, emb_size, hidden_size) if hasattr(self.enc_layer, "cnn"): enc_size = self.enc_layer.cnn.out_channels else: enc_size = hidden_size * 2 self.output_layer = nn.Sequential( nn.Dropout(p=dropout), nn.Linear(enc_size, output_size), nn.Sigmoid() if nonlinearity == "sigmoid" else nn.LogSoftmax(dim=-1) ) self.report_params()
def __init__( self, embed: nn.Embedding = None, hidden_size: int = 200, dropout: float = 0.1, layer: str = "rcnn", ): super(Encoder, self).__init__() self.embed_layer = nn.Sequential(embed, nn.Dropout(p=dropout)) emb_size = embed.weight.shape[1] self.enc_size = hidden_size * 2 self.enc_layer = get_encoder(layer, emb_size, hidden_size)
def __init__(self, embed: nn.Embedding = None, hidden_size: int = 200, dropout: float = 0.1, layer: str = "rcnn"): super(IndependentGenerator, self).__init__() emb_size = embed.weight.shape[1] enc_size = hidden_size * 2 self.embed_layer = nn.Sequential(embed, nn.Dropout(p=dropout)) self.enc_layer = get_encoder(layer, emb_size, hidden_size) self.z_layer = BernoulliGate(enc_size) self.z = None # z samples self.z_dists = [] # z distribution(s) self.report_params()
def __init__( self, embed: nn.Embedding = None, ntoken: int = 150000, hidden_size: int = 200, # ninp: int = 2048, nhead: int = 2, nhid: int = 200, nlayers: int = 2, dropout: float = 0.1, layer: str = "lstm", ): super(TransformerModel, self).__init__() from torch.nn import TransformerEncoder, TransformerEncoderLayer self.model_type = 'Transformer' self.src_mask = None emb_size = embed.weight.shape[1] self.embed_layer = nn.Sequential(embed, nn.Dropout(p=dropout)) self.enc_layer = get_encoder(layer, emb_size, hidden_size) if hasattr(self.enc_layer, "cnn"): enc_size = self.enc_layer.cnn.out_channels else: enc_size = hidden_size * 2 self.pos_encoder = PositionalEncoding(enc_size, dropout) encoder_layers = TransformerEncoderLayer(enc_size, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) # self.encoder = nn.Embedding(ntoken, ninp) self.ninp = enc_size # self.decoder = nn.Linear(ninp, ntoken) self.init_weights()
def __init__( self, aspects, cfg=None, vocab=None, ): super(MultiAspectsLatentRationaleModel, self).__init__() self.aspects = aspects self.latent_models = [] self.transformers = [] self.aspect_rating_predictors = [] self.aspect_rating_classifiers = [] self.aspect_polarity_predictors = [] self.aspect_polarity_classifiers = [] self.vocab = vocab vectors = load_embeddings(cfg["embeddings"], vocab) output_size = 1 emb_size = cfg["emb_size"] hidden_size = cfg["hidden_size"] dropout = cfg["dropout"] layer = cfg["layer"] vocab_size = len(vocab.w2i) dependent_z = cfg["dependent_z"] selection = cfg["selection"] lasso = cfg["lasso"] lagrange_alpha = cfg["lagrange_alpha"] lagrange_lr = cfg["lagrange_lr"] lambda_init = cfg["lambda_init"] lambda_min = cfg["lambda_min"] lambda_max = cfg["lambda_max"] self.hidden_size = hidden_size self.output_size = output_size self.dropout = dropout self.L2_regularize = cfg["L2_regularize"] self.embed = embed = nn.Embedding(vocab_size, emb_size, padding_idx=1) self.embed_layer = nn.Sequential(embed, nn.Dropout(p=dropout)) self.enc_layer = get_encoder(layer, emb_size, hidden_size) for i in range(self.aspects): latent_model = LatentRationaleModel(vocab_size=vocab_size, emb_size=emb_size, hidden_size=hidden_size, output_size=output_size, dropout=dropout, dependent_z=dependent_z, layer=layer, selection=selection, lasso=lasso, lagrange_alpha=lagrange_alpha, lagrange_lr=lagrange_lr, lambda_init=lambda_init, lambda_min=lambda_min, lambda_max=lambda_max) initialize_model_(latent_model) # load pre-trained word embeddings with torch.no_grad(): latent_model.embed.weight.data.copy_(torch.from_numpy(vectors)) print("Embeddings fixed: {}".format(cfg["fix_emb"])) latent_model.embed.weight.requires_grad = not cfg["fix_emb"] latent_model = latent_model.to(device) self.latent_models.append(latent_model) #Try to use embedding aspect_rating_classifier = Rating_Classifier( hidden_size=hidden_size, output_size=output_size, dropout=dropout, layer=layer) aspect_rating_classifier = aspect_rating_classifier.to(device) initialize_model_(aspect_rating_classifier) self.aspect_rating_classifiers.append(aspect_rating_classifier) aspect_polarity_classifier = Polarity_Classifier( hidden_size=hidden_size, output_size=output_size, dropout=dropout, layer=layer) aspect_polarity_classifier = aspect_polarity_classifier.to(device) initialize_model_(aspect_polarity_classifier) self.aspect_polarity_classifiers.append(aspect_polarity_classifier) transformer = TransformerModel(embed=embed, hidden_size=hidden_size, nhead=2) transformer = transformer.to(device) initialize_model_(transformer) # # load pre-trained word embeddings # with torch.no_grad(): # transformer.embed.weight.data.copy_(torch.from_numpy(vectors)) # print("Embeddings fixed: {}".format(cfg["fix_emb"])) # transformer.embed.weight.requires_grad = not cfg["fix_emb"] self.transformers.append(transformer)