def __init__(self, n_cell, size_hidden, rate_dropout): super(ONT_BiGRU, self).__init__() self.rate_dropout = rate_dropout with self.init_scope(): self.rnn_a = L.NStepBiGRU(n_cell, 300, size_hidden, rate_dropout) self.rnn_b = L.NStepBiGRU(n_cell, 300, size_hidden, rate_dropout) self.l1 = L.Linear(size_hidden * 4, size_hidden * 4) self.l2 = L.Linear(size_hidden * 4, 4)
def __init__(self): super().__init__() with self.init_scope(): self.seq_birnn = L.NStepBiGRU(1, EMBED, EMBED, DROPOUT) self.att_linear = L.Linear(4 * EMBED, EMBED) self.att_birnn = L.NStepBiGRU(1, EMBED, EMBED, DROPOUT) self.att_score = L.Linear(2 * EMBED, 1) self.state_linear = L.Linear(4 * EMBED, EMBED)
def __init__(self, emb_size, hidden_size, use_dropout=0.1, flag_gpu=True): """ emb_size:入力される分散表現ベクトル次元数 hidden_size:隠れ層次元数 use_dropout:float値.どの程度ドロップアウト使うか """ w = chainer.initializers.HeNormal() super(SentenceEncoderBiGRU, self).__init__( # word_embed=L.EmbedID(n_vocab, emb_size, ignore_label=-1), word_embed=L.Linear(emb_size, hidden_size, initialW=w), bi_gru=L.NStepBiGRU(n_layers=1, in_size=hidden_size, out_size=hidden_size, dropout=use_dropout)) """ n_layers:層数 in_size:入力ベクトルの次元数 out_size:出力ベクトルの次元数 drop_out:dropout率 """ self.USE_DROPOUT = use_dropout self.USE_DROPOUT_keep = use_dropout self.FLAG_GPU = flag_gpu # GPUを使う場合はcupyを使わないときはnumpyを使う if flag_gpu: self.ARR = cupy cupy.cuda.Device(0).use() else: self.ARR = np
def __init__(self, idim, elayers, cdim, hdim, subsample, dropout, typ="lstm"): super(BRNNP, self).__init__() with self.init_scope(): for i in six.moves.range(elayers): if i == 0: inputdim = idim else: inputdim = hdim setattr( self, "birnn%d" % i, L.NStepBiLSTM(1, inputdim, cdim, dropout) if typ == "lstm" else L.NStepBiGRU(1, inputdim, cdim, dropout)) # bottleneck layer to merge setattr(self, "bt%d" % i, L.Linear(2 * cdim, hdim)) self.elayers = elayers self.cdim = cdim self.subsample = subsample self.typ = typ
def __init__(self, n_layers, in_size, hidden_dims, use_dropout): super(GaussianEncoder, self).__init__() with self.init_scope(): self.gru = L.NStepBiGRU(n_layers=n_layers, in_size=in_size, out_size=hidden_dims, dropout=use_dropout)
def __init__(self, n_layer, n_vocab, n_units, dropout, cudnn, initialW = None): super(BiRNN, self).__init__( embed=L.EmbedID(n_vocab, 50, initialW = initialW, ignore_label = 0), l1=L.NStepBiGRU(n_layer, 50, 50, dropout) #l2=L.Linear(n_units/2, 10), )
def setUp(self): shape = (self.n_layer * 2, len(self.lengths), self.out_size) if self.hidden_none: self.h = numpy.zeros(shape, 'f') else: self.h = numpy.random.uniform(-1, 1, shape).astype('f') self.xs = [ numpy.random.uniform(-1, 1, (l, self.in_size)).astype('f') for l in self.lengths ] self.gh = numpy.random.uniform(-1, 1, shape).astype('f') self.gys = [ numpy.random.uniform(-1, 1, (l, self.out_size * 2)).astype('f') for l in self.lengths ] self.rnn = links.NStepBiGRU(self.n_layer, self.in_size, self.out_size, self.dropout, use_cudnn=self.use_cudnn) for layer in self.rnn: for p in layer.params(): p.data[...] = numpy.random.uniform(-1, 1, p.data.shape) self.rnn.zerograds()
def __init__(self, n_embed, n_unit, n_att_unit, n_att_head, dr_hideen, dr_input, pre_vec, gpu_flag): initializer1 = chainer.initializers.Uniform() initializer2 = chainer.initializers.HeNormal() super(GRU_Encoder, self).__init__() with self.init_scope(): self.gru = L.NStepBiGRU(1, n_embed, n_unit, dr_hideen) self.decode = L.Linear(None, n_embed, initialW=initializer1) self.att_w1 = L.Linear(n_unit * 2, n_att_unit, nobias=True, initialW=initializer1) self.att_w2 = L.Linear(n_att_unit, n_att_head, nobias=True, initialW=initializer1) for w in self.gru.namedparams(): name = w[0].split('/')[2] if 'w' in name: shape = w[1].shape w[1].initializer = initializer2 w[1].initialize(shape) self.embed = np.copy(pre_vec) self.gpu_flag = gpu_flag self.dr_input = dr_input self.n_att_head = n_att_head self.tmp_weight = None
def __init__(self, n_layer, n_units, n_hidden, dropout, cudnn, initialW = None): super(BiRNN_1, self).__init__( l1=L.NStepBiGRU(n_layer,n_units, n_hidden, dropout) ) if cudnn == False: import numpy as xp else: import cupy as xp
def __init__(self, n_layer, n_units, n_hidden, dropout, cudnn, initialW=None): super(BiRNN_1, self).__init__( l1=L.NStepBiGRU(n_layer, n_units, n_hidden, dropout))
def __init__(self, idim, elayers, cdim, hdim, dropout, typ="lstm"): super(BRNN, self).__init__() with self.init_scope(): self.nbrnn = L.NStepBiLSTM( elayers, idim, cdim, dropout) if typ == "lstm" else L.NStepBiGRU( elayers, idim, cdim, dropout) self.l_last = L.Linear(cdim * 2, hdim) self.typ = typ
def __init__(self, n_layers, in_size, out_size, dropout): super(BiGRUAggregator, self).__init__() with self.init_scope(): self.bigru_layer = links.NStepBiGRU(n_layers, in_size, out_size, dropout) self.out_layer = GraphLinear(2 * out_size, out_size) self.n_layers = n_layers self.in_size = in_size self.out_size = out_size self.dropout = dropout
def __init__(self): super(CRNN, self).__init__() with self.init_scope(): self.conv1 = L.Convolution2D(3, 32, 3, pad=1) self.conv2 = L.Convolution2D(32, 32, 3, pad=1) self.conv3 = L.Convolution2D(32, 32, 3, pad=1) self.conv4 = L.Convolution2D(32, 64, 3, pad=1) self.conv5 = L.Convolution2D(64, 128, 3, pad=1) self.conv6 = L.Convolution2D(128, 128, 3, pad=1) self.rnn = L.NStepBiGRU(2, in_size=512, out_size=512, dropout=0.2) self.embedding = L.Linear(512 * 2, 63)
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_units, type_unit, word_dropout, denoising_rate, direc, attr, loss_type): super(Seq2seq, self).__init__() with self.init_scope(): self.embed_x = L.EmbedID(n_source_vocab, n_units) self.embed_y = L.EmbedID(n_target_vocab, n_units) #self.attention = Attention(n_units) if type_unit == 'lstm': if direc == 'uni': self.encoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1) self.decoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1) elif direc == 'bi': self.encoder = L.NStepBiLSTM(n_layers, n_units, n_units, 0.1) self.decoder = L.NStepBiLSTM(n_layers, n_units, n_units, 0.1) elif type_unit == 'gru': if direc == 'uni': self.encoder = L.NStepGRU(n_layers, n_units, n_units, 0.1) self.decoder = L.NStepGRU(n_layers, n_units, n_units, 0.1) elif direc == 'bi': self.encoder = L.NStepBiGRU(n_layers, n_units, n_units, 0.1) self.decoder = L.NStepBiGRU(n_layers, n_units, n_units, 0.1) if direc == 'uni': self.W = L.Linear(n_units, n_target_vocab) elif direc == 'bi': self.W = L.Linear(2 * n_units, n_target_vocab) if attr: self.Wc = L.Linear(2 * n_units, n_units) self.n_layers = n_layers self.n_units = n_units self.type_unit = type_unit self.word_dropout = word_dropout self.denoising_rate = denoising_rate self.attr = attr self.loss_type = loss_type
def __init__(self, in_size, bank_k, proj_filters1, proj_filters2): super(CBHG, self).__init__() with self.init_scope(): self.conv1d_banks = [ Conv1DwithBatchNorm(in_size, 128, i + 1) for i in range(bank_k) ] self.conv1d_proj1 = Conv1DwithBatchNorm(128, proj_filters1, 3) self.conv1d_proj2 = Conv1DwithBatchNorm(proj_filters1, proj_filters2, 3) self.highways = [ L.Highway(proj_filters2) for i in range(4) ] # The parameters of the original paper are probably wrong. self.gru = L.NStepBiGRU(1, proj_filters2, 128, dropout=0)
def __init__(self, inv_examples): super().__init__() self.add_persistent('inv_examples', inv_examples) # (T, I, 1+L+1) # Create model parameters with self.init_scope(): self.embed = L.EmbedID(VOCAB, EMBED, ignore_label=0) self.task_embed = L.EmbedID(TASKS, EMBED) self.vmap_params = C.Parameter(0.0, (inv_examples.shape[:2]) + (VOCAB, ), name='vmap_params') self.uni_birnn = L.NStepBiGRU(1, EMBED, EMBED, 0) self.uni_linear = L.Linear(EMBED * 2, EMBED, nobias=True) self.l1 = L.Linear(LENGTH * EMBED + TASKS, EMBED * 2) self.l2 = L.Linear(EMBED * 2, EMBED) self.l3 = L.Linear(EMBED, EMBED) self.log = None
def __init__(self, vocab_size, character_embed_size, embed_size, hidden_size, batch_size, use_dropout, initial_embedding): super(EncoderGRU, self).__init__( word_embed=L.EmbedID(vocab_size, character_embed_size, initialW=initial_embedding, ignore_label=-1), bi_gru=L.NStepBiGRU(n_layers=1, in_size=character_embed_size, out_size=hidden_size, dropout=use_dropout), h_e=L.Linear(hidden_size * 2, hidden_size * 2), e_o=L.Linear(hidden_size * 2, embed_size), ) self.hidden_size = hidden_size self.batch_size = batch_size self.use_dropout = use_dropout
def construct_RNN(unit_type, bidirection, n_layers, n_input, n_units, dropout): rnn = None if unit_type == 'lstm': if bidirection: rnn = L.NStepBiLSTM(n_layers, n_input, n_units, dropout) else: rnn = L.NStepLSTM(n_layers, n_input, n_units, dropout) elif unit_type == 'gru': if bidirection: rnn = L.NStepBiGRU(n_layers, n_input, n_units, dropout) else: rnn = L.NStepGRU(n_layers, n_input, n_units, dropout) else: if bidirection: rnn = L.NStepBiRNNTanh(n_layers, n_input, n_units, dropout) else: rnn = L.NStepRNNTanh(n_layers, n_input, n_units, dropout) print('# RNN unit: {}, dropout={}'.format(rnn, rnn.__dict__['dropout']), file=sys.stderr) for i, c in enumerate(rnn._children): print('# {}-th param'.format(i), file=sys.stderr) print('# 0 - W={}, b={}'.format(c.w0.shape, c.b0.shape), file=sys.stderr) print('# 1 - W={}, b={}'.format(c.w1.shape, c.b1.shape), file=sys.stderr) if unit_type == 'gru' or unit_type == 'lstm': print('# 2 - W={}, b={}'.format(c.w2.shape, c.b2.shape), file=sys.stderr) print('# 3 - W={}, b={}'.format(c.w3.shape, c.b3.shape), file=sys.stderr) print('# 4 - W={}, b={}'.format(c.w4.shape, c.b4.shape), file=sys.stderr) print('# 5 - W={}, b={}'.format(c.w5.shape, c.b5.shape), file=sys.stderr) if unit_type == 'lstm': print('# 6 - W={}, b={}'.format(c.w6.shape, c.b6.shape), file=sys.stderr) print('# 7 - W={}, b={}'.format(c.w7.shape, c.b7.shape), file=sys.stderr) return rnn
def __init__(self, n_voc, emb_dim, hid_dim, seq_len, gpu_num, dropout=0.2): super(Discriminator, self).__init__() self.hid_dim = hid_dim self.emb_dim = emb_dim self.seq_len = seq_len self.gpu_num = gpu_num self.dropout = dropout w = I.Normal(1.) with self.init_scope(): self.embeddings = L.EmbedID(n_voc, emb_dim, initialW=w) self.gru = L.NStepBiGRU(2, emb_dim, hid_dim, dropout) self.gru2hidden = L.Linear(2 * 2 * hid_dim, hid_dim, initialW=w, initial_bias=I.Zero()) self.dropout_linear = F.dropout self.hidden2out = L.Linear(hid_dim, 1, initialW=w, initial_bias=I.Zero())
def __init__(self, rule_stories): super().__init__() # Setup rule repo rvctx, rvq, rva, rsupps = vectorise_stories( rule_stories) # (R, Ls, L), (R, Q), (R, A), (R, I) self.add_persistent('rvctx', rvctx) self.add_persistent('rvq', rvq) self.add_persistent('rva', rva) self.add_persistent('rsupps', rsupps) # Create model parameters with self.init_scope(): self.embed = L.EmbedID(len(word2idx), EMBED, ignore_label=0) # self.rulegen = RuleGen() self.vmap_params = C.Parameter(0.0, (rvq.shape[0], len(word2idx)), name='vmap_params') # (R, V) self.mematt = MemAttention() self.uni_birnn = L.NStepBiGRU(1, EMBED, EMBED, DROPOUT) self.uni_linear = L.Linear(EMBED, EMBED, nobias=True) self.rule_linear = L.Linear(EMBED, EMBED, nobias=True) self.answer_linear = L.Linear(EMBED, len(word2idx)) self.log = None
def __init__( self, out_dim, hidden_dim=16, n_layers=4, n_atom_types=MAX_ATOMIC_NUM, concat_hidden=False, layer_aggregator=None, dropout_rate=0.0, batch_normalization=False, weight_tying=True, use_attention=False, update_attention=False, attention_tying=True, context=False, context_layers=1, context_dropout=0., message_function='matrix_multiply', edge_hidden_dim=16, readout_function='graph_level', num_timesteps=3, num_output_hidden_layers=0, output_hidden_dim=16, output_activation=functions.relu, output_atoms=False, ): super(GGNN, self).__init__() n_readout_layer = n_layers if concat_hidden else 1 n_message_layer = 1 if weight_tying else n_layers n_attention_layer = 1 if attention_tying else n_layers self.n_readout_layer = n_readout_layer self.n_message_layer = n_message_layer self.n_attention_layer = n_attention_layer self.out_dim = out_dim self.hidden_dim = hidden_dim self.n_layers = n_layers self.concat_hidden = concat_hidden self.layer_aggregator = layer_aggregator self.dropout_rate = dropout_rate self.batch_normalization = batch_normalization self.weight_tying = weight_tying self.use_attention = use_attention self.update_attention = update_attention self.attention_tying = attention_tying self.context = context self.context_layers = context_layers self.context_dropout = context_dropout self.message_functinon = message_function self.edge_hidden_dim = edge_hidden_dim self.readout_function = readout_function self.num_timesteps = num_timesteps self.num_output_hidden_layers = num_output_hidden_layers self.output_hidden_dim = output_hidden_dim self.output_activation = output_activation self.output_atoms = output_atoms with self.init_scope(): # Update self.embed = EmbedAtomID(out_size=hidden_dim, in_size=n_atom_types) self.message_layers = chainer.ChainList(*[ GraphLinear(hidden_dim, self.NUM_EDGE_TYPE * hidden_dim) for _ in range(n_message_layer) ]) if self.message_functinon == 'edge_network': del self.message_layers self.message_layers = chainer.ChainList(*[ EdgeNetwork(in_dim=self.NUM_EDGE_TYPE, hidden_dim=self.edge_hidden_dim, node_dim=self.hidden_dim) for _ in range(n_message_layer) ]) if self.context: self.context_bilstm = links.NStepBiLSTM( n_layers=self.context_layers, in_size=self.hidden_dim, out_size=self.hidden_dim / 2, dropout=context_dropout) # self-attention layer if use_attention or update_attention: # these commented layers are written for GAT impelmented by TensorFlow. # self.linear_transform_layer = chainer.ChainList( # *[links.ConvolutionND(1, in_channels=hidden_dim, out_channels=hidden_dim, ksize=1, nobias=True) # for _ in range(n_attention_layer)] # ) # self.conv1d_layer_1 = chainer.ChainList( # *[links.ConvolutionND(1, in_channels=hidden_dim, out_channels=1, ksize=1) # for _ in range(n_attention_layer)] # ) # self.conv1d_layer_2 = chainer.ChainList( # *[links.ConvolutionND(1, in_channels=hidden_dim, out_channels=1, ksize=1) # for _ in range(n_attention_layer)] # ) self.linear_transform_layer = chainer.ChainList(*[ links.Linear( in_size=hidden_dim, out_size=hidden_dim, nobias=True) for _ in range(n_attention_layer) ]) self.neural_network_layer = chainer.ChainList(*[ links.Linear( in_size=2 * self.hidden_dim, out_size=1, nobias=True) for _ in range(n_attention_layer) ]) # batch normalization if batch_normalization: self.batch_normalization_layer = links.BatchNormalization( size=hidden_dim) self.update_layer = links.GRU(2 * hidden_dim, hidden_dim) # Readout self.i_layers = chainer.ChainList(*[ GraphLinear(2 * hidden_dim, out_dim) for _ in range(n_readout_layer) ]) self.j_layers = chainer.ChainList(*[ GraphLinear(hidden_dim, out_dim) for _ in range(n_readout_layer) ]) if self.readout_function == 'set2vec': del self.i_layers, self.j_layers # def __init__(self, node_dim, output_dim, num_timesteps=3, inner_prod='default', # num_output_hidden_layers=0, output_hidden_dim=16, activation=chainer.functions.relu): self.readout_layer = chainer.ChainList(*[ Set2Vec(node_dim=self.hidden_dim * 2, output_dim=out_dim, num_timesteps=num_timesteps, num_output_hidden_layers=num_output_hidden_layers, output_hidden_dim=output_hidden_dim, activation=output_activation) for _ in range(n_readout_layer) ]) if self.layer_aggregator: self.construct_layer_aggregator() if self.layer_aggregator == 'gru-attn' or 'gru': self.bigru_layer = links.NStepBiGRU( n_layers=1, in_size=self.hidden_dim, out_size=self.hidden_dim, dropout=0.) if self.layer_aggregator == 'lstm-attn' or 'lstm': self.bilstm_layer = links.NStepBiLSTM( n_layers=1, in_size=self.hidden_dim, out_size=self.hidden_dim, dropout=0.) if self.layer_aggregator == 'gru-attn' or 'lstm-attn' or 'attn': self.attn_dense_layer = links.Linear( in_size=self.n_layers, out_size=self.n_layers) if self.layer_aggregator == 'self-attn': self.attn_linear_layer = links.Linear( in_size=self.n_layers, out_size=self.n_layers) if self.output_atoms: self.atoms = None
def __init__(self, n_layer, in_size, n_units, out_size, dropout=0.5): super(BiGRU, self).__init__() with self.init_scope(): self.embed = L.EmbedID(in_size, n_units) self.l1 = L.NStepBiGRU(n_layer, n_units, n_units, dropout) self.l2 = L.Linear(n_units * 2, out_size)
def __init__(self, input_size, rnn_type, bidirectional, num_units, num_proj, num_layers, dropout_input, dropout_hidden, subsample_list=[], subsample_type='drop', use_cuda=False, merge_bidirectional=False, num_stack=1, splice=1, input_channel=1, conv_channels=[], conv_kernel_sizes=[], conv_strides=[], poolings=[], activation='relu', batch_norm=False, residual=False, dense_residual=False, num_layers_sub=0): super(RNNEncoder, self).__init__() if len(subsample_list) > 0 and len(subsample_list) != num_layers: raise ValueError( 'subsample_list must be the same size as num_layers.') if subsample_type not in ['drop', 'concat']: raise TypeError('subsample_type must be "drop" or "concat".') if num_layers_sub < 0 or (num_layers_sub > 1 and num_layers < num_layers_sub): raise ValueError('Set num_layers_sub between 1 to num_layers.') self.rnn_type = rnn_type self.bidirectional = bidirectional self.num_directions = 2 if bidirectional else 1 self.num_units = num_units self.num_proj = num_proj if num_proj is not None else 0 self.num_layers = num_layers self.dropout_input = dropout_input self.dropout_hidden = dropout_hidden self.merge_bidirectional = merge_bidirectional self.use_cuda = use_cuda # TODO: self.clip_activation = clip_activation # Setting for hierarchical encoder self.num_layers_sub = num_layers_sub # Setting for subsampling if len(subsample_list) == 0: self.subsample_list = [False] * num_layers else: self.subsample_list = subsample_list self.subsample_type = subsample_type # This implementation is bases on # https://arxiv.org/abs/1508.01211 # Chan, William, et al. "Listen, attend and spell." # arXiv preprint arXiv:1508.01211 (2015). # Setting for residual connection assert not (residual and dense_residual) self.residual = residual self.dense_residual = dense_residual subsample_last_layer = 0 for l_reverse, is_subsample in enumerate(subsample_list[::-1]): if is_subsample: subsample_last_layer = num_layers - l_reverse break self.residual_start_layer = subsample_last_layer + 1 # NOTE: residual connection starts from the last subsampling layer with self.init_scope(): # Setting for CNNs before RNNs# Setting for CNNs before RNNs if len(conv_channels) > 0 and len(conv_channels) == len( conv_kernel_sizes) and len(conv_kernel_sizes) == len( conv_strides): assert num_stack == 1 and splice == 1 self.conv = CNNEncoder(input_size, input_channel=input_channel, conv_channels=conv_channels, conv_kernel_sizes=conv_kernel_sizes, conv_strides=conv_strides, poolings=poolings, dropout_input=0, dropout_hidden=dropout_hidden, activation=activation, use_cuda=use_cuda, batch_norm=batch_norm) input_size = self.conv.output_size else: input_size = input_size * splice * num_stack self.conv = None self.rnns = [] self.projections = [] for l in range(num_layers): if l == 0: encoder_input_size = input_size elif self.num_proj > 0: encoder_input_size = num_proj if subsample_type == 'concat' and l > 0 and self.subsample_list[ l - 1]: encoder_input_size *= 2 else: encoder_input_size = num_units * self.num_directions if subsample_type == 'concat' and l > 0 and self.subsample_list[ l - 1]: encoder_input_size *= 2 if rnn_type == 'lstm': if bidirectional: rnn_i = L.NStepBiLSTM(n_layers=1, in_size=encoder_input_size, out_size=num_units, dropout=0) else: rnn_i = L.NStepLSTM(n_layers=1, in_size=encoder_input_size, out_size=num_units, dropout=0) elif rnn_type == 'gru': if bidirectional: rnn_i = L.NStepBiGRU(n_layers=1, in_size=encoder_input_size, out_size=num_units, dropout=0) else: rnn_i = L.NStepGRU(n_layers=1, in_size=encoder_input_size, out_size=num_units, dropout=0) elif rnn_type == 'rnn': if bidirectional: # rnn_i = L.NStepBiRNNReLU( rnn_i = L.NStepBiRNNTanh(n_layers=1, in_size=encoder_input_size, out_size=num_units, dropout=0) else: # rnn_i = L.NStepRNNReLU( rnn_i = L.NStepRNNTanh(n_layers=1, in_size=encoder_input_size, out_size=num_units, dropout=0) else: raise ValueError( 'rnn_type must be "lstm" or "gru" or "rnn".') if use_cuda: rnn_i.to_gpu() setattr(self, rnn_type + '_l' + str(l), rnn_i) if l != self.num_layers - 1 and self.num_proj > 0: proj_i = LinearND(num_units * self.num_directions, num_proj, dropout=dropout_hidden, use_cuda=use_cuda) if use_cuda: proj_i.to_gpu() setattr(self, 'proj_l' + str(l), proj_i)
def __init__(self, n_layer, n_vocab, n_units, dropout, cudnn): super(BiRNN, self).__init__( embed=L.EmbedID(n_vocab, n_units, ignore_label=0), l1=L.NStepBiGRU(n_layer, n_units, n_units, dropout), l2=L.Linear(n_units, 10), )