def __init__(self, lstm_num, position_num, position_units, value_units, n_units, activation): lstm_stack = chainer.ChainList() for i in range(lstm_num): lstm_stack.add_link(L.LSTM(n_units, n_units)) output_pos_layers = chainer.ChainList() for i in range(position_units - 1): output_pos_layers.add_link( L.Bilinear(position_num, n_units, n_units)) output_pos_mid_layers = chainer.ChainList() for i in range(position_units - 1): output_pos_mid_layers.add_link( L.Linear(n_units, n_units + position_num)) output_val_layers = chainer.ChainList() for i in range(value_units - 1): output_val_layers.add_link(L.Bilinear(1, n_units, n_units)) output_val_mid_layers = chainer.ChainList() for i in range(position_units - 1): output_val_mid_layers.add_link(L.Linear(n_units, n_units + 2)) super(LogLSTM, self).__init__(input_layer=L.Linear( position_num * position_units + value_units, n_units), input_mid_layer=L.Linear(n_units, n_units), lstms=lstm_stack, output_pos1=L.Linear(n_units, position_num), output_pos_layers=output_pos_layers, output_pos_mid_layers=output_pos_mid_layers, output_lastpos=L.Bilinear(position_num, n_units, n_units), output_val_layers=output_val_layers, output_val_mid_layers=output_val_mid_layers, output_last_value=L.Linear(n_units, 2)) self.position_num = position_num self.lstm_num = lstm_num self.position_units = position_units self.value_units = value_units self.n_units = n_units if activation == 'sigmoid': self.activate = F.sigmoid elif activation == 'relu': self.activate = F.relu else: pass
def __init__(self, hidden_dim, out_dim, head, activation=functions.identity): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism """ super(DeepNieFineCoattention, self).__init__() with self.init_scope(): self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1) self.attention_layer_1 = GraphLinear(head, 1, nobias=True) self.attention_layer_2 = GraphLinear(head, 1, nobias=True) self.prev_lt_layer_1 = GraphLinear(hidden_dim, hidden_dim, nobias=False) self.prev_lt_layer_2 = GraphLinear(hidden_dim, hidden_dim, nobias=False) self.lt_layer_1 = GraphLinear(hidden_dim, head, nobias=True) self.lt_layer_2 = GraphLinear(hidden_dim, head, nobias=True) self.j_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.head = head self.activation = activation
def __init__(self, hidden_dim, out_dim, head, activation=functions.identity): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism """ super(ExtremeDeepNieFineCoattention, self).__init__() self.n_lt_layers = 3 with self.init_scope(): self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1) self.attention_layer_1 = GraphLinear(head, 1, nobias=True) self.attention_layer_2 = GraphLinear(head, 1, nobias=True) # self.prev_lt_layer_1 = GraphLinear(hidden_dim, hidden_dim, nobias=False) self.prev_lt_layers_1 = chainer.ChainList(*[ GraphLinear(hidden_dim, hidden_dim, nobias=False) for _ in range(self.n_lt_layers) ]) # self.prev_lt_layer_2 = GraphLinear(hidden_dim, hidden_dim, nobias=False) self.prev_lt_layers_2 = chainer.ChainList(*[ GraphLinear(hidden_dim, hidden_dim, nobias=False) for _ in range(self.n_lt_layers) ]) self.lt_layer_1 = GraphLinear(hidden_dim, head, nobias=True) self.lt_layer_2 = GraphLinear(hidden_dim, head, nobias=True) self.j_layer = GraphLinear(hidden_dim, out_dim) # modification for concat self.hidden_dim = hidden_dim self.out_dim = out_dim self.head = head self.activation = activation
def __init__(self, n_outputs): super(GoogLeNet, self).__init__( conv1=L.Convolution2D(3, 64, 7, stride=2, pad=3), conv2_reduce=L.Convolution2D(64, 64, 1), conv2=L.Convolution2D(64, 192, 3, stride=1, pad=1), inc3a=L.Inception(192, 64, 96, 128, 16, 32, 32), inc3b=L.Inception(256, 128, 128, 192, 32, 96, 64), inc4a=L.Inception(480, 192, 96, 208, 16, 48, 64), inc4b=L.Inception(512, 160, 112, 224, 24, 64, 64), inc4c=L.Inception(512, 128, 128, 256, 24, 64, 64), inc4d=L.Inception(512, 112, 144, 288, 32, 64, 64), inc4e=L.Inception(528, 256, 160, 320, 32, 128, 128), inc5a=L.Inception(832, 256, 160, 320, 32, 128, 128), inc5b=L.Inception(832, 384, 192, 384, 48, 128, 128), loss3_fc1=L.Linear(4096, 300), loss3_fc2=L.Linear(300, n_outputs), doc_fc1=L.Linear(1000, 600), doc_fc2=L.Linear(600, 300), bi1=L.Bilinear(300, 300, 300), loss1_conv=L.Convolution2D(512, 128, 1), loss1_fc1=L.Linear(4 * 4 * 128, 1024), loss1_fc2=L.Linear(1024, n_outputs), loss2_conv=L.Convolution2D(528, 128, 1), loss2_fc1=L.Linear(4 * 4 * 128, 1024), loss2_fc2=L.Linear(1024, n_outputs), )
def __init__(self, model_path): Param.load(self, model_path / 'tagger_defs.txt') self.extractor = FeatureExtractor(model_path) self.in_dim = self.word_dim + self.char_dim super(BiaffineJaLSTMParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim), emb_char=L.EmbedID(self.n_chars, 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=L.Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, left_dim, right_dim, out_dim, ntn_out_dim=8, hidden_dims=(16, ), activation=relu): super(Bilinear, self).__init__() ntn_layer = links.Bilinear(left_size=left_dim, right_size=right_dim, out_size=ntn_out_dim) mlp_layers = [ links.Linear(in_size=None, out_size=hidden_dim) for hidden_dim in hidden_dims ] with self.init_scope(): self.ntn_layer = ntn_layer self.mlp_layers = chainer.ChainList(*mlp_layers) self.l_out = links.Linear(in_size=None, out_size=out_dim) self.left_dim = left_dim self.right_dim = right_dim self.out_dim = out_dim self.hidden_dims = hidden_dims self.activation = activation
def __init__(self, hidden_dim, out_dim, head, activation=functions.tanh, weight_tying=True): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism """ super(ParallelCoattention, self).__init__() n_entities = 1 if weight_tying else 2 with self.init_scope(): self.energy_layers = chainer.ChainList(*[ links.Bilinear(hidden_dim, out_dim, head) for _ in range(n_entities) ]) self.j_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.head = head self.activation = activation self.weight_tying = weight_tying
def __init__(self, left_size, right_size, out_size): super(AttnAggregator, self).__init__() with self.init_scope(): self.bilinear_layer = links.Bilinear(left_size, right_size, out_size) self.left_size = left_size self.right_size = right_size self.out_size = out_size
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2, hidden_dim=128, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.char_dim = char_dim p.hidden_dim = hidden_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_chars = len(read_model_defs(model_path + "/chars.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + self.char_dim self.dropout_ratio = dropout_ratio super(BiaffineJaLSTMParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim), emb_char=L.EmbedID(self.n_chars, 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=L.Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, hidden_dim, out_dim, activation=functions.tanh): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation """ super(LinearTransformFineCoattention, self).__init__() with self.init_scope(): self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1) self.j_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.activation = activation
def __init__(self, hidden_dim, out_dim, activation=functions.tanh): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism """ super(PoolingFineCoattention, self).__init__() with self.init_scope(): self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1) self.j_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.activation = activation
def __init__(self, n_outputs): super(RetweetNet, self).__init__( conv1=L.Convolution2D(3, 96, ksize=11, stride=4, pad=0), conv2=L.Convolution2D(96, 256, ksize=5, pad=2), conv3=L.Convolution2D(256, 384, ksize=3, pad=1), conv4=L.Convolution2D(384, 384, ksize=3, pad=1), conv5=L.Convolution2D(384, 256, ksize=3, pad=1), fc6=L.Linear(12544, 4096), fc7=L.Linear(4096, 300), doc_fc1=L.Linear(1000, 600), doc_fc2=L.Linear(600, 300), bi1=L.Bilinear(300, 300, 200), fc8=L.Linear(200, n_outputs), )
def setUp(self): self.f = links.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size, True) W = self.f.W.data W[...] = numpy.random.uniform(-1, 1, W.shape) self.f.zerograds() self.W = W.copy() self.e1 = _uniform(self.batch_size, self.in_shape[0]) self.e2 = _uniform(self.batch_size, self.in_shape[1]) self.gy = _uniform(self.batch_size, self.out_size) self.y = numpy.einsum('ij,ik,jkl->il', self.e1, self.e2, self.W)
def __init__(self): super(MDL_full, self).__init__( convR1=L.Convolution2D(3, 96, 11, stride=4), convR2=L.Convolution2D(96, 256, 5, pad=2), convR3=L.Convolution2D(256, 384, 3, pad=1), convR4=L.Convolution2D(384, 384, 3, pad=1), convR5=L.Convolution2D(384, 256, 3, pad=1), fcR6=L.Linear(9216, 4096), fcR7=L.Linear(4096, 4096), convD1=L.Convolution2D(3, 96, 11, stride=4), convD2=L.Convolution2D(96, 256, 5, pad=2), convD3=L.Convolution2D(256, 384, 3, pad=1), convD4=L.Convolution2D(384, 384, 3, pad=1), convD5=L.Convolution2D(384, 256, 3, pad=1), fcD6=L.Linear(9216, 4096), fcD7=L.Linear(4096, 4096), fc8=L.Bilinear(4096, 4096, 4096), fc9=L.Linear(4096, 1000), ) self.train = True
def setUp(self): self.f = links.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size) self.f.W.data[...] = _uniform(*self.f.W.data.shape) self.f.V1.data[...] = _uniform(*self.f.V1.data.shape) self.f.V2.data[...] = _uniform(*self.f.V2.data.shape) self.f.b.data[...] = _uniform(*self.f.b.data.shape) self.f.zerograds() self.W = self.f.W.data.copy() self.V1 = self.f.V1.data.copy() self.V2 = self.f.V2.data.copy() self.b = self.f.b.data.copy() self.e1 = _uniform(self.batch_size, self.in_shape[0]) self.e2 = _uniform(self.batch_size, self.in_shape[1]) self.gy = _uniform(self.batch_size, self.out_size) self.y = (numpy.einsum('ij,ik,jkl->il', self.e1, self.e2, self.W) + self.e1.dot(self.V1) + self.e2.dot(self.V2) + self.b)
def __init__(self, n_outputs): super(GoogLeNetBN, self).__init__( conv1=L.Convolution2D(3, 64, 7, stride=2, pad=3, nobias=True), norm1=L.BatchNormalization(64), conv2=L.Convolution2D(64, 192, 3, pad=1, nobias=True), norm2=L.BatchNormalization(192), inc3a=L.InceptionBN(192, 64, 64, 64, 64, 96, 'avg', 32), inc3b=L.InceptionBN(256, 64, 64, 96, 64, 96, 'avg', 64), inc3c=L.InceptionBN(320, 0, 128, 160, 64, 96, 'max', stride=2), inc4a=L.InceptionBN(576, 224, 64, 96, 96, 128, 'avg', 128), inc4b=L.InceptionBN(576, 192, 96, 128, 96, 128, 'avg', 128), inc4c=L.InceptionBN(576, 128, 128, 160, 128, 160, 'avg', 128), inc4d=L.InceptionBN(576, 64, 128, 192, 160, 192, 'avg', 128), inc4e=L.InceptionBN(576, 0, 128, 192, 192, 256, 'max', stride=2), inc5a=L.InceptionBN(1024, 352, 192, 320, 160, 224, 'avg', 128), inc5b=L.InceptionBN(1024, 352, 192, 320, 192, 224, 'max', 128), linz=L.Linear(1024, 300), out=L.Linear(300, n_outputs), outimg=L.Linear(1024, n_outputs), outdoc=L.Linear(1000, n_outputs), doc_fc1=L.Linear(1000, 600), doc_fc2=L.Linear(600, 300), conva=L.Convolution2D(576, 128, 1, nobias=True), norma=L.BatchNormalization(128), lina=L.Linear(3200, 1024, nobias=True), norma2=L.BatchNormalization(1024), outa=L.Linear(1024, n_outputs), convb=L.Convolution2D(576, 128, 1, nobias=True), normb=L.BatchNormalization(128), linb=L.Linear(3200, 1024, nobias=True), normb2=L.BatchNormalization(1024), outb=L.Linear(1024, n_outputs), bi1=L.Bilinear(300, 300, 300) )
def __init__(self, mem_units, label_num, attention_method, attention_target, dropout_ratio, is_regression=False, only_attn=False): super().__init__(atw1=L.Linear(mem_units, mem_units), atw1con=L.Linear(2 * mem_units, mem_units), atw1gate=L.Linear(2 * mem_units, mem_units), atw1bi=L.Bilinear(mem_units, mem_units, 1), atw2=L.Linear(mem_units, 1), atout_class=L.Linear(mem_units + mem_units, label_num), atout_reg=L.Linear(mem_units + mem_units, 1), out_class=L.Linear(mem_units, label_num), out_reg=L.Linear(mem_units, 1)) self.__count = {'total_root': 0, 'correct_root': 0} self.__attention_method = attention_method self.__attention_target = attention_target self.__is_regression = is_regression self.__dropout_ratio = dropout_ratio self.__only_attn = only_attn
def check_invalid(self, initialW, initial_bias, nobias): with self.assertRaises(AssertionError): links.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size, nobias, initialW, initial_bias)
def check_normal(self, initialW, initial_bias, nobias): links.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size, nobias, initialW, initial_bias)
def __init__(self, nr_in, nr_out): Chain.__init__(self, fwd=L.LSTM(nr_in, nr_out), bwd=L.LSTM(nr_in, nr_out), mix=L.Bilinear(nr_out, nr_out, nr_out))
# make model print("make model") n_units = 4000 ''' model = chainer.FunctionSet( l1=L.Linear(5096, n_units), l2=L.Linear(n_units, 4)) ''' model = chainer.FunctionSet( l1_x1=L.Linear(1000, n_units), l1_x2=L.Linear(4096, n_units), l2_x1=L.Linear(n_units, 150), l2_x2=L.Linear(n_units, 150), l3=L.Linear(100, 4), l4=L.Bilinear(150, 150, 100), ) cuda.get_device(0).use() model.to_gpu() xp = cuda.cupy def forward(x_data, y_data, train=True): # 入力と教師データ x1 = chainer.Variable(xp.asarray(x_data[:, :1000])) x2 = chainer.Variable(xp.asarray(x_data[:, 1000:])) t = chainer.Variable(xp.asarray(y_data)) ''' x = chainer.Variable(xp.asarray(x_data)) t = chainer.Variable(xp.asarray(y_data))
def __init__(self, encoder, num_labels=46, num_aux_lbls=0, mlp_arc_units=100, mlp_lbl_units=100, mlp_tag_units=100, arc_dropout=0.0, lbl_dropout=0.5, tag_dropout=0.2, treeify='chu', visualise=False, debug=False, sub_attn=False, add_feat=False, apply_mtl=False, alpha=1.0, beta=0.0): super(GraphParser, self).__init__() self.num_labels = num_labels self.num_aux_lbls = num_aux_lbls self.mlp_arc_units = mlp_arc_units self.mlp_lbl_units = mlp_lbl_units self.mlp_tag_units = mlp_tag_units self.arc_dropout = arc_dropout self.lbl_dropout = lbl_dropout self.tag_dropout = tag_dropout self.treeify = treeify.lower() self.visualise = visualise self.debug = debug self.sleep_time = 0. self.sub_attn = sub_attn self.add_feat = add_feat # MTL parameters self.apply_mtl = apply_mtl self.alpha = alpha self.beta = beta assert (treeify in self.TREE_OPTS) self.unit_mult = 2 if encoder.use_bilstm else 1 with self.init_scope(): self.encoder = encoder self.vT = L.Linear(mlp_arc_units, 1) # head self.H_arc = L.Linear(self.unit_mult * self.encoder.num_units, mlp_arc_units) # dependent self.D_arc = L.Linear(self.unit_mult * self.encoder.num_units, mlp_arc_units) if self.sub_attn: # parameters for subword attention self.units_dim = self.encoder.embedder.word_encoder.num_units self.max_sub_len = self.encoder.embedder.word_encoder.max_sub_len # k = softmax([sub_feats_head] x V_attn x h_dep) self.V_attn = L.Bilinear(self.units_dim * self.max_sub_len, mlp_arc_units, self.max_sub_len) # g = sigmoid(Wglob x h_head + m_head) self.W_glob = L.Linear(mlp_arc_units, mlp_arc_units) self.W_loc = L.Linear(mlp_arc_units, mlp_arc_units) # parameters for computing the scores self.W_head = L.Linear(mlp_arc_units, mlp_arc_units) self.W_dependent = L.Linear(mlp_arc_units, mlp_arc_units) self.U_lbl_attn = L.Linear(mlp_arc_units, mlp_lbl_units) if self.beta > 0: self.l1_tag = L.Linear(self.unit_mult * self.encoder.num_units, self.mlp_tag_units) self.l2_tag = L.Linear(self.mlp_tag_units, self.mlp_tag_units) self.out_tag = L.Linear(self.mlp_tag_units, self.num_aux_lbls) self.V_lblT = L.Linear(mlp_lbl_units, self.num_labels) self.U_lbl = L.Linear(self.unit_mult * self.encoder.num_units, mlp_lbl_units) self.W_lbl = L.Linear(self.unit_mult * self.encoder.num_units, mlp_lbl_units)
def __init__(self, vocab, n_units, mem_units, attention_method, is_regression=False, train=True, forget_bias=False, is_leaf_as_chunk=False): n_vocab = vocab.get_vocab_size() comp_type = Composition.tree_attention_lstm if forget_bias: super().__init__( embed=L.EmbedID(n_vocab, n_units), embed2hidden=L.Linear(n_units, mem_units), updatel=L.Linear(mem_units * 4, mem_units), updater=L.Linear(mem_units * 4, mem_units), inputl=L.Linear(mem_units * 4, mem_units), inputr=L.Linear(mem_units * 4, mem_units), forgetl=L.Linear(mem_units * comp_type.forget_in_size(), mem_units, initial_bias=np.ones(mem_units)), forgetr=L.Linear(mem_units * comp_type.forget_in_size(), mem_units, initial_bias=np.ones(mem_units)), outputl=L.Linear(mem_units * 4, mem_units), outputr=L.Linear(mem_units * 4, mem_units)) else: super().__init__( embed=L.EmbedID(n_vocab, n_units), embed2hidden=L.Linear(n_units, mem_units), updatel=L.Linear(mem_units * 4, mem_units), updater=L.Linear(mem_units * 4, mem_units), inputl=L.Linear(mem_units * 4, mem_units), inputr=L.Linear(mem_units * 4, mem_units), forgetl=L.Linear(mem_units * comp_type.forget_in_size(), mem_units), forgetr=L.Linear(mem_units * comp_type.forget_in_size(), mem_units), outputl=L.Linear(mem_units * 4, mem_units), outputr=L.Linear(mem_units * 4, mem_units), atw1=L.Linear(mem_units, mem_units), atw1con=L.Linear(2 * mem_units, mem_units), atw1gate=L.Linear(2 * mem_units, mem_units), atw1bi=L.Bilinear(mem_units, mem_units, 1), atw2=L.Linear(mem_units, 1), ) self.__attention_method = attention_method self.__train = train self.__vocab = vocab self.is_leaf_as_chunk = is_leaf_as_chunk self.mem_units = mem_units self.n_units = n_units self.is_regression = is_regression self.comp_type = comp_type # init embed if self.__vocab.embed_model is not None: for i in range(self.__vocab.get_vocab_size()): word = self.__vocab.id2word(i) if word in self.__vocab.embed_model: vec = self.__vocab.embed_model[word] self.embed.W.data[i] = vec