def __init__(self, char_vocab_size, word_vocab_size, num_labels): super().__init__() # architecture self.embed = embed(char_vocab_size, word_vocab_size) self.rnn1 = getattr(nn, RNN_TYPE)( input_size = sum(EMBED.values()), hidden_size = HIDDEN_SIZE // NUM_DIRS, num_layers = NUM_LAYERS, batch_first = True, bidirectional = NUM_DIRS == 2 ) self.rnn2 = getattr(nn, RNN_TYPE)( input_size = HIDDEN_SIZE, hidden_size = HIDDEN_SIZE // NUM_DIRS, num_layers = NUM_LAYERS, batch_first = True, bidirectional = NUM_DIRS == 2 ) self.attn = None if ATTN == "attn": # global attention self.attn = attn(HIDDEN_SIZE) if ATTN == "attn-rc": # global attention with residual connection self.attn = attn(sum(EMBED.values()) + HIDDEN_SIZE * 2) if ATTN == "mh-attn": # multi-head attention self.attn = attn_mh() self.fc = nn.Linear(HIDDEN_SIZE, num_labels) self.softmax = nn.LogSoftmax(1) if CUDA: self = self.cuda()
def __init__(self, cti, wti): super().__init__() self.H = None # encoder hidden states # architecture self.embed = embed(ENC_EMBED, len(cti), len(wti)) self.rnn = getattr(nn, RNN_TYPE)(input_size=self.embed.dim, hidden_size=HIDDEN_SIZE // NUM_DIRS, num_layers=NUM_LAYERS, bias=True, batch_first=True, dropout=DROPOUT, bidirectional=(NUM_DIRS == 2))
def __init__(self, cti_size, wti_size, num_tags): super().__init__() self.batch_size = 0 # architecture self.embed = embed(EMBED, cti_size, wti_size, HRE) self.rnn = getattr(nn, RNN_TYPE)(input_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE // NUM_DIRS, num_layers=NUM_LAYERS, bias=True, dropout=DROPOUT, bidirectional=(NUM_DIRS == 2)) self.out = nn.Linear(HIDDEN_SIZE, num_tags) # RNN output to tag
def __init__(self, char_vocab_size, word_vocab_size, num_tags): super().__init__() # architecture self.embed = embed(char_vocab_size, word_vocab_size) self.rnn = getattr(nn, RNN_TYPE)( input_size = sum(EMBED.values()), hidden_size = HIDDEN_SIZE // NUM_DIRS, num_layers = NUM_LAYERS, bias = True, batch_first = True, dropout = DROPOUT, bidirectional = (NUM_DIRS == 2) ) self.out = nn.Linear(HIDDEN_SIZE, num_tags) # RNN output to tag
def __init__(self, cti_size, wti_size): super().__init__() self.hs = None # source hidden state self.hidden = None # decoder hidden state # architecture self.embed = embed(EMBED, cti_size, wti_size) self.rnn = getattr(nn, RNN_TYPE)(input_size=self.embed.dim, hidden_size=HIDDEN_SIZE // NUM_DIRS, num_layers=NUM_LAYERS, bias=True, batch_first=True, dropout=DROPOUT, bidirectional=(NUM_DIRS == 2)) self.attn = attn()
def __init__(self, char_vocab_size, word_vocab_size, num_labels): super().__init__() # architecture self.embed = embed(char_vocab_size, word_vocab_size, EMBED_SIZE) self.conv = nn.ModuleList([ nn.Conv2d(in_channels=1, out_channels=NUM_FEATURE_MAPS, kernel_size=(i, EMBED_SIZE)) for i in KERNEL_SIZES ]) self.dropout = nn.Dropout(DROPOUT) self.fc = nn.Linear(len(KERNEL_SIZES) * NUM_FEATURE_MAPS, num_labels) self.softmax = nn.LogSoftmax(1) if CUDA: self = self.cuda()
def __init__(self, char_vocab_size, word_vocab_size, num_labels): super().__init__() # architecture self.embed = embed(EMBED, char_vocab_size, word_vocab_size) self.conv = nn.ModuleList([ nn.Conv2d( in_channels=1, # Ci out_channels=NUM_FEATMAPS, # Co kernel_size=(i, self.embed.dim) # height, width ) for i in KERNEL_SIZES ]) # num_kernels (K) self.dropout = nn.Dropout(DROPOUT) self.fc = nn.Linear(len(KERNEL_SIZES) * NUM_FEATMAPS, num_labels) self.softmax = nn.LogSoftmax(1) if CUDA: self = self.cuda()
def __init__(self, wti_size): super().__init__() self.M = None # source hidden states self.hidden = None # decoder hidden states # architecture self.embed = embed(DEC_EMBED, 0, wti_size) self.rnn = getattr(nn, RNN_TYPE)( input_size=self.embed.dim + HIDDEN_SIZE, # input feeding hidden_size=HIDDEN_SIZE // NUM_DIRS, num_layers=NUM_LAYERS, bias=True, batch_first=True, dropout=DROPOUT, bidirectional=(NUM_DIRS == 2)) self.attn = attn() self.out = nn.Linear(HIDDEN_SIZE, wti_size) self.softmax = nn.LogSoftmax(1)
def __init__(self, x_wti, y_wti): super().__init__() self.M = None # encoder hidden states self.H = None # decoder hidden states self.h = None # decoder output # architecture self.embed = embed(DEC_EMBED, 0, len(y_wti)) self.rnn = getattr(nn, RNN_TYPE)(input_size=self.embed.dim + HIDDEN_SIZE, hidden_size=HIDDEN_SIZE // NUM_DIRS, num_layers=NUM_LAYERS, bias=True, batch_first=True, dropout=DROPOUT, bidirectional=(NUM_DIRS == 2)) self.attn = attn() self.Wc = nn.Linear(HIDDEN_SIZE * 2, HIDDEN_SIZE) if COPY: self.copy = copy(x_wti, y_wti) self.Wo = nn.Linear(HIDDEN_SIZE, len(y_wti)) self.softmax = nn.LogSoftmax(1)