def __init__(self, phase, cfg, size, base, extras, head, num_classes):
        super(association_lstm, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = vid
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'],
                                    self.cfg['POOLING_SIZE'], 1.0 / 16.0)
        self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'],
                                     self.cfg['POOLING_SIZE'], 1.0 / 16.0)

        self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg[
            'CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE']
        self.roi_crop = _RoICrop()
        self.img_shape = (self.cfg['min_dim'], self.cfg['min_dim'])
        self.tensor_len = 4 + self.num_classes + 49
        self.bnlstm1 = BNLSTM(input_size=84,
                              hidden_size=150,
                              batch_first=False,
                              bidirectional=False)
        self.bnlstm2 = BNLSTM(input_size=150,
                              hidden_size=300,
                              batch_first=False,
                              bidirectional=False)
        self.cls_pred = nn.Linear(300, self.num_classes)
        self.bbox_pred = nn.Linear(300, 4)
        self.association_pred = nn.Linear(300, 49)
        self.MultiProjectLoss = MultiProjectLoss(self.num_classes, 0, True, 3,
                                                 0.5)
        if phase == 'vid_train':
            self.softmax = nn.Softmax(dim=-1)
            #self.detect = Trnsform_target(num_classes, 200, 0.5, 0.01, 0.45)
            self.detect = train_target(num_classes, 200, 0.5, 0.01, 0.45)
예제 #2
0
	def build(self, conf):
		conf.check()
		wscale = 1.0

		embed_id = EmbedID(conf.n_vocab, conf.embed_size, ignore_label=-1)
		if conf.use_gpu:
			embed_id.to_gpu()

		lstm_attributes = {}
		lstm_units = [(conf.embed_size, conf.lstm_hidden_units[0])]
		lstm_units += zip(conf.lstm_hidden_units[:-1], conf.lstm_hidden_units[1:])

		for i, (n_in, n_out) in enumerate(lstm_units):
			if conf.lstm_apply_batchnorm:
				lstm_attributes["layer_%i" % i] = BNLSTM(n_in, n_out)
			else:
				lstm_attributes["layer_%i" % i] = L.LSTM(n_in, n_out)

		lstm = LSTMNetwork(**lstm_attributes)
		lstm.n_layers = len(lstm_units)
		lstm.apply_dropout = conf.lstm_apply_dropout
		if conf.use_gpu:
			lstm.to_gpu()

		fc_attributes = {}
		fc_units = [(conf.lstm_hidden_units[-1], conf.fc_hidden_units[0])]
		fc_units += zip(conf.fc_hidden_units[:-1], conf.fc_hidden_units[1:])
		if conf.fc_output_type == self.OUTPUT_TYPE_EMBED_VECTOR:
			fc_units += [(conf.fc_hidden_units[-1], conf.embed_size)]
		elif conf.fc_output_type == self.OUTPUT_TYPE_SOFTMAX:
			fc_units += [(conf.fc_hidden_units[-1], conf.n_vocab)]
		else:
			raise Exception()

		for i, (n_in, n_out) in enumerate(fc_units):
			fc_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale)
			fc_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in)

		fc = FullyConnectedNetwork(**fc_attributes)
		fc.n_layers = len(fc_units)
		fc.activation_function = conf.fc_activation_function
		fc.apply_batchnorm = conf.fc_apply_batchnorm
		fc.apply_dropout = conf.fc_apply_dropout
		if conf.use_gpu:
			fc.to_gpu()

		return embed_id, lstm, fc
예제 #3
0
 def __init__(self, p, n_units, train=True):
     super(LSTM, self).__init__(
         embed=L.EmbedID(p + 1, n_units),
         l1=BNLSTM(n_units, n_units),
         l2=L.Linear(n_units, p + 1),
     )
예제 #4
0
파일: model.py 프로젝트: musyoku/NLP
	def build(self, conf):
		conf.check()
		wscale = 0.1

		embed_id = EmbedID(conf.n_vocab, conf.char_embed_size, ignore_label=-1)
		if conf.gpu_enabled:
			embed_id.to_gpu()

		# encoder
		lstm_attributes = {}
		lstm_units = [(conf.char_embed_size, conf.word_encoder_lstm_units[0])]
		lstm_units += zip(conf.word_encoder_lstm_units[:-1], conf.word_encoder_lstm_units[1:])

		for i, (n_in, n_out) in enumerate(lstm_units):
			if conf.word_encoder_lstm_apply_batchnorm:
				lstm_attributes["layer_%i" % i] = BNLSTM(n_in, n_out)
			else:
				lstm_attributes["layer_%i" % i] = LSTM(n_in, n_out)

		word_encoder_lstm = LSTMEncoder(**lstm_attributes)
		word_encoder_lstm.n_layers = len(lstm_units)
		if conf.gpu_enabled:
			word_encoder_lstm.to_gpu()

		# decoder
		lstm_attributes = {}
		lstm_units = [(conf.char_embed_size + conf.word_embed_size, conf.word_decoder_lstm_units[0])]
		lstm_units += zip(conf.word_decoder_lstm_units[:-1], conf.word_decoder_lstm_units[1:])

		for i, (n_in, n_out) in enumerate(lstm_units):
			if conf.word_encoder_lstm_apply_batchnorm:
				lstm_attributes["layer_%i" % i] = BNLSTM(n_in, n_out)
			else:
				lstm_attributes["layer_%i" % i] = LSTM(n_in, n_out)
		lstm_attributes["layer_output"] = L.Linear(conf.word_decoder_lstm_units[-1], conf.n_vocab, wscale=wscale)

		word_decoder_lstm = LSTMDecoder(**lstm_attributes)
		word_decoder_lstm.n_layers = len(lstm_units)
		if conf.gpu_enabled:
			word_decoder_lstm.to_gpu()

		# word n-gram
		lstm_attributes = {}
		lstm_units = [(conf.word_embed_size, conf.word_ngram_lstm_units[0])]
		lstm_units += zip(conf.word_ngram_lstm_units[:-1], conf.word_ngram_lstm_units[1:])

		for i, (n_in, n_out) in enumerate(lstm_units):
			if conf.word_encoder_lstm_apply_batchnorm:
				lstm_attributes["layer_%i" % i] = BNLSTM(n_in, n_out)
			else:
				lstm_attributes["layer_%i" % i] = LSTM(n_in, n_out)

		word_ngram_lstm = LSTMEncoder(**lstm_attributes)
		word_ngram_lstm.n_layers = len(lstm_units)
		if conf.gpu_enabled:
			word_ngram_lstm.to_gpu()

		# variational encoder for word n-gram
		fc_attributes = {}
		fc_units = []
		if len(conf.word_ngram_fc_hidden_units) > 0:
			fc_units = [(conf.word_ngram_lstm_units[-1], conf.word_ngram_fc_hidden_units[0])]
			fc_units += zip(conf.word_ngram_fc_hidden_units[:-1], conf.word_ngram_fc_hidden_units[1:])
			for i, (n_in, n_out) in enumerate(fc_units):
				fc_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale)
				fc_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out)
			fc_attributes["layer_mean"] = L.Linear(conf.word_ngram_fc_hidden_units[-1], conf.word_embed_size, wscale=wscale)
			fc_attributes["layer_var"] = L.Linear(conf.word_ngram_fc_hidden_units[-1], conf.word_embed_size, wscale=wscale)
		else:
			fc_attributes["layer_mean"] = L.Linear(conf.word_ngram_lstm_units[-1], conf.word_embed_size, wscale=wscale)
			fc_attributes["layer_var"] = L.Linear(conf.word_ngram_lstm_units[-1], conf.word_embed_size, wscale=wscale)

		word_ngram_fc = GaussianNetwork(**fc_attributes)
		word_ngram_fc.n_layers = len(fc_units)
		word_ngram_fc.nonlinear = conf.word_ngram_fc_nonlinear
		word_ngram_fc.apply_batchnorm = conf.word_ngram_fc_apply_batchnorm
		word_ngram_fc.apply_dropout = conf.word_ngram_fc_apply_dropout
		if conf.gpu_enabled:
			word_ngram_fc.to_gpu()

		# variational encoder
		fc_attributes = {}
		fc_units = []
		if len(conf.word_encoder_fc_hidden_units) > 0:
			fc_units = [(conf.word_encoder_lstm_units[-1], conf.word_encoder_fc_hidden_units[0])]
			fc_units += zip(conf.word_encoder_fc_hidden_units[:-1], conf.word_encoder_fc_hidden_units[1:])
			for i, (n_in, n_out) in enumerate(fc_units):
				fc_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale)
				fc_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out)
			fc_attributes["layer_mean"] = L.Linear(conf.word_encoder_fc_hidden_units[-1], conf.word_embed_size, wscale=wscale)
			fc_attributes["layer_var"] = L.Linear(conf.word_encoder_fc_hidden_units[-1], conf.word_embed_size, wscale=wscale)
		else:
			fc_attributes["layer_mean"] = L.Linear(conf.word_encoder_lstm_units[-1], conf.word_embed_size, wscale=wscale)
			fc_attributes["layer_var"] = L.Linear(conf.word_encoder_lstm_units[-1], conf.word_embed_size, wscale=wscale)

		word_encoder_fc = GaussianNetwork(**fc_attributes)
		word_encoder_fc.n_layers = len(fc_units)
		word_encoder_fc.nonlinear = conf.word_encoder_fc_nonlinear
		word_encoder_fc.apply_batchnorm = conf.word_encoder_fc_apply_batchnorm
		word_encoder_fc.apply_dropout = conf.word_encoder_fc_apply_dropout
		if conf.gpu_enabled:
			word_encoder_fc.to_gpu()

		# discriminator
		fc_attributes = {}
		fc_units = [(conf.word_embed_size, conf.discriminator_hidden_units[0])]
		fc_units += zip(conf.discriminator_hidden_units[:-1], conf.discriminator_hidden_units[1:])
		fc_units += [(conf.discriminator_hidden_units[-1], 2)]
		for i, (n_in, n_out) in enumerate(fc_units):
			fc_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale)
			fc_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out)

		discriminator = MultiLayerPerceptron(**fc_attributes)
		discriminator.n_layers = len(fc_units)
		discriminator.nonlinear = conf.word_encoder_fc_nonlinear
		discriminator.apply_batchnorm = conf.word_encoder_fc_apply_batchnorm
		discriminator.apply_dropout = conf.word_encoder_fc_apply_dropout
		if conf.gpu_enabled:
			discriminator.to_gpu()

		return embed_id, word_encoder_lstm, word_encoder_fc, word_decoder_lstm, discriminator, word_ngram_lstm, word_ngram_fc
예제 #5
0
    def __init__(self, conf, name="mono"):
        self.name = name
        conf.check()
        wscale = 0.1

        forward_lstm_attributes = {}
        forward_lstm_units = [(conf.ndim_char_embed, conf.lstm_hidden_units[0])
                              ]
        forward_lstm_units += zip(conf.lstm_hidden_units[:-1],
                                  conf.lstm_hidden_units[1:])

        for i, (n_in, n_out) in enumerate(forward_lstm_units):
            if conf.rnn_type == "dsgu":
                forward_lstm_attributes["layer_%i" % i] = StatefulDSGU(
                    n_in, n_out)
            elif conf.rnn_type == "lstm":
                if conf.lstm_apply_batchnorm:
                    forward_lstm_attributes["layer_%i" % i] = BNLSTM(
                        n_in, n_out)
                else:
                    forward_lstm_attributes["layer_%i" % i] = L.LSTM(
                        n_in, n_out)
            elif conf.rnn_type == "gru":
                forward_lstm_attributes["layer_%i" % i] = L.StatefulGRU(
                    n_in, n_out)
            else:
                raise NotImplementedError()

        self.forward_lstm = StackedLSTM(**forward_lstm_attributes)
        self.forward_lstm.n_layers = len(forward_lstm_units)
        self.forward_lstm.apply_dropout = conf.lstm_apply_dropout

        backward_lstm_attributes = {}
        backward_lstm_units = [(conf.ndim_char_embed,
                                conf.lstm_hidden_units[0])]
        backward_lstm_units += zip(conf.lstm_hidden_units[:-1],
                                   conf.lstm_hidden_units[1:])

        for i, (n_in, n_out) in enumerate(backward_lstm_units):
            if conf.rnn_type == "dsgu":
                backward_lstm_attributes["layer_%i" % i] = StatefulDSGU(
                    n_in, n_out)
            elif conf.rnn_type == "lstm":
                if conf.lstm_apply_batchnorm:
                    backward_lstm_attributes["layer_%i" % i] = BNLSTM(
                        n_in, n_out)
                else:
                    backward_lstm_attributes["layer_%i" % i] = L.LSTM(
                        n_in, n_out)
            elif conf.rnn_type == "gru":
                backward_lstm_attributes["layer_%i" % i] = L.StatefulGRU(
                    n_in, n_out)
            else:
                raise NotImplementedError()

        self.backward_lstm = StackedLSTM(**backward_lstm_attributes)
        self.backward_lstm.n_layers = len(backward_lstm_units)
        self.backward_lstm.apply_dropout = conf.lstm_apply_dropout

        self.char_embed = L.EmbedID(conf.n_vocab,
                                    conf.ndim_char_embed,
                                    ignore_label=-1)

        self.f_ym = L.Linear(conf.lstm_hidden_units[-1],
                             conf.ndim_m,
                             nobias=True)
        self.f_um = L.Linear(conf.lstm_hidden_units[-1],
                             conf.ndim_m,
                             nobias=True)

        attention_fc_attributes = {}
        if len(conf.attention_fc_hidden_units) == 0:
            attention_fc_hidden_units = [(conf.ndim_m, 1)]
        else:
            attention_fc_hidden_units = [(conf.ndim_m,
                                          conf.attention_fc_hidden_units[0])]
            attention_fc_hidden_units += zip(
                conf.attention_fc_hidden_units[:-1],
                conf.attention_fc_hidden_units[1:])
            attention_fc_hidden_units += [(conf.attention_fc_hidden_units[-1],
                                           1)]
        for i, (n_in, n_out) in enumerate(attention_fc_hidden_units):
            attention_fc_attributes["layer_%i" % i] = L.Linear(n_in,
                                                               n_out,
                                                               wscale=wscale)
        self.attention_fc = FullyConnectedNetwork(**attention_fc_attributes)
        self.attention_fc.n_layers = len(attention_fc_hidden_units)
        self.attention_fc.hidden_activation_function = conf.attention_fc_hidden_activation_function
        self.attention_fc.output_activation_function = conf.attention_fc_output_activation_function
        self.attention_fc.apply_dropout = conf.attention_fc_apply_dropout

        self.f_rg = L.Linear(conf.lstm_hidden_units[-1],
                             conf.ndim_g,
                             nobias=True)
        self.f_ug = L.Linear(conf.lstm_hidden_units[-1],
                             conf.ndim_g,
                             nobias=True)

        reader_fc_attributes = {}
        if len(conf.reader_fc_hidden_units) == 0:
            reader_fc_hidden_units = [(conf.ndim_g, conf.n_vocab)]
        else:
            reader_fc_hidden_units = [(conf.ndim_g,
                                       conf.reader_fc_hidden_units[0])]
            reader_fc_hidden_units += zip(conf.reader_fc_hidden_units[:-1],
                                          conf.reader_fc_hidden_units[1:])
            reader_fc_hidden_units += [(conf.reader_fc_hidden_units[-1],
                                        conf.n_vocab)]
        for i, (n_in, n_out) in enumerate(reader_fc_hidden_units):
            reader_fc_attributes["layer_%i" % i] = L.Linear(n_in,
                                                            n_out,
                                                            wscale=wscale)
        self.reader_fc = FullyConnectedNetwork(**reader_fc_attributes)
        self.reader_fc.n_layers = len(reader_fc_hidden_units)
        self.reader_fc.hidden_activation_function = conf.reader_fc_hidden_activation_function
        self.reader_fc.output_activation_function = conf.reader_fc_output_activation_function
        self.reader_fc.apply_dropout = conf.attention_fc_apply_dropout

        if conf.use_gpu:
            self.forward_lstm.to_gpu()
            self.backward_lstm.to_gpu()
            self.char_embed.to_gpu()
            self.attention_fc.to_gpu()
            self.reader_fc.to_gpu()
            self.f_ym.to_gpu()
            self.f_um.to_gpu()
            self.f_rg.to_gpu()
            self.f_ug.to_gpu()

        self.optimizer_char_embed = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_char_embed.setup(self.char_embed)
        self.optimizer_char_embed.add_hook(GradientClipping(10.0))

        self.optimizer_forward_lstm = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_forward_lstm.setup(self.forward_lstm)
        self.optimizer_forward_lstm.add_hook(GradientClipping(10.0))

        self.optimizer_backward_lstm = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_backward_lstm.setup(self.backward_lstm)
        self.optimizer_backward_lstm.add_hook(GradientClipping(10.0))

        self.optimizer_f_um = optimizers.Adam(alpha=conf.learning_rate,
                                              beta1=conf.gradient_momentum)
        self.optimizer_f_um.setup(self.f_um)
        self.optimizer_f_um.add_hook(GradientClipping(10.0))

        self.optimizer_f_ym = optimizers.Adam(alpha=conf.learning_rate,
                                              beta1=conf.gradient_momentum)
        self.optimizer_f_ym.setup(self.f_ym)
        self.optimizer_f_ym.add_hook(GradientClipping(10.0))

        self.optimizer_attention_fc = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_attention_fc.setup(self.attention_fc)
        self.optimizer_attention_fc.add_hook(GradientClipping(10.0))

        self.optimizer_f_rg = optimizers.Adam(alpha=conf.learning_rate,
                                              beta1=conf.gradient_momentum)
        self.optimizer_f_rg.setup(self.f_rg)
        self.optimizer_f_rg.add_hook(GradientClipping(10.0))

        self.optimizer_f_ug = optimizers.Adam(alpha=conf.learning_rate,
                                              beta1=conf.gradient_momentum)
        self.optimizer_f_ug.setup(self.f_ug)
        self.optimizer_f_ug.add_hook(GradientClipping(10.0))

        self.optimizer_reader_fc = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_reader_fc.setup(self.reader_fc)
        self.optimizer_reader_fc.add_hook(GradientClipping(10.0))