Ejemplo n.º 1
0
def fit_mlp(image_size=(28, 28),
            datasets='../data/mnist.pkl.gz',
            outpath='../output/mnist_lenet.params',
            n_hidden=500,
            learning_rate=0.01,
            L1_reg=0.00,
            L2_reg=0.001,
            n_epochs=1000,
            batch_size=20,
            patience=10000,
            patience_increase=2,
            improvement_threshold=0.995):

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    classifier = MLP(rng=rng.RandomState(SEED),
                     input=x,
                     n_in=reduce(np.multiply, image_size),
                     n_hidden=n_hidden,
                     n_out=10)
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2)
    learner = SupervisedMSGD(index, x, y, batch_size, learning_rate,
                             load_data(datasets), outpath, classifier, cost)

    best_validation_loss, best_iter, epoch, elapsed_time = learner.fit(
        n_epochs=n_epochs,
        patience=patience,
        patience_increase=patience_increase,
        improvement_threshold=improvement_threshold)
    display_results(best_validation_loss, elapsed_time, epoch)

    return learner
Ejemplo n.º 2
0
    def __init__(self, input_dim, hidden_dim, output_dim, trans_num, diffusion_num, duration, bias=True, rnn_type='GRU', model_type='C', trans_activate_type='L'):
        super(CTGCN, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.rnn_type = rnn_type
        self.model_type = model_type
        self.trans_activate_type = trans_activate_type
        self.method_name = 'CTGCN' + '-' + model_type

        assert self.model_type in ['C', 'S']
        assert self.trans_activate_type in ['L', 'N']

        self.duration = duration
        self.trans_num = trans_num
        self.diffusion_num = diffusion_num
        self.bias = bias

        self.mlp_list = nn.ModuleList()
        self.duffision_list = nn.ModuleList()

        for i in range(self.duration):
            if self.model_type == 'C':
                self.mlp_list.append(MLP(input_dim, hidden_dim, hidden_dim, trans_num, bias=bias, activate_type=trans_activate_type))
                self.duffision_list.append(CDN(hidden_dim, output_dim, output_dim, diffusion_num, rnn_type=rnn_type))
            else:  # model_type == 'S'
                self.mlp_list.append(MLP(input_dim, hidden_dim, output_dim, trans_num, bias=bias, activate_type=trans_activate_type))
                self.duffision_list.append(CDN(output_dim, output_dim, output_dim, diffusion_num, rnn_type=rnn_type))
        assert self.rnn_type in ['LSTM', 'GRU']

        if self.rnn_type == 'LSTM':
            self.rnn = nn.LSTM(output_dim, output_dim, num_layers=1, bias=bias, batch_first=True)
        else:
            self.rnn = nn.GRU(output_dim, output_dim, num_layers=1, bias=bias, batch_first=True)
        self.norm = nn.LayerNorm(output_dim)
Ejemplo n.º 3
0
    def __init__(self, name='scene_mlp', layer_sizes=(2048, 1024, 1024, 80), model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                layer_sizes = f.attrs['layer_sizes']
        self.config = {'layer_sizes': layer_sizes}

        # define inputs
        x = T.matrix('x')
        y = T.matrix('y')
        self.inputs = [x, y]

        # define computation graph
        self.mlp = MLP(layer_sizes=layer_sizes, name='mlp', output_type='softmax')
        self.proba = self.mlp.compute(x)
        self.log_proba = T.log(self.proba)

        # define costs
        def kl_divergence(p, q):
            kl = T.mean(T.sum(p * T.log((p+1e-30)/(q+1e-30)), axis=1))
            kl += T.mean(T.sum(q * T.log((q+1e-30)/(p+1e-30)), axis=1))
            return kl
        kl = kl_divergence(self.proba, y)
        acc = T.mean(T.eq(self.proba.argmax(axis=1), y.argmax(axis=1)))
        self.costs = [kl, acc]

        # layers and parameters
        self.layers = [self.mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)
Ejemplo n.º 4
0
    def load_pretrain_weight(self):
        """
		loading weights from pre-trained MLP and GMF model
		:return:
		"""
        config = self.config
        config['latent_dim'] = config['latent_dim_mlp']
        mlp_model = MLP(config)
        if config['use_cuda']:
            mlp_model.cuda()

        self.embedding_user_mlp.weight.data = mlp_model.embedding_user.weight.data
        self.embedding_item_mlp.weight.data = mlp_model.embedding_item.weight.data
        for i in range(len(self.fc_layers)):
            self.fc_layers[i].weight.data = mlp_model.fc_layers[i].weight.data

        config['latent_dim'] = config['latent_dim_mf']
        gmf_model = GMF(config)
        if config['use_cuda']:
            gmf_model.cuda()
        self.embedding_user_mf.weight.data = gmf_model.embedding_user.weight.data
        self.embedding_item_mf.weight.data = gmf_model.embedding_item.weight.data

        self.affine_output.weight.data = torch.cat([
            config['alpha'] * mlp_model.affine_output.weight.data,
            (1 - config['alpha']) * gmf_model.affine_output.weight.data
        ],
                                                   dim=0)
        self.affine_output.bias.data = config['alpha'] * mlp_model.affine_output.bias.data + (1 - config['alpha']) \
                  * gmf_model.affine_out.bias.data
Ejemplo n.º 5
0
    def __init__(self, name='ra', nimg=2048, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, na)).astype(theano.config.floatX))
Ejemplo n.º 6
0
 def __init__(self, args):
     super(BaseRN, self).__init__()
     self.init_encoders(args)
     self.g_theta = MLP(args.cv_filter + 2 + args.te_hidden,
                        args.basern_gt_hidden, args.basern_gt_hidden,
                        args.basern_gt_layer)
     self.f_phi = MLP(args.basern_gt_hidden,
                      args.basern_fp_hidden,
                      args.a_size,
                      args.basern_fp_layer,
                      args.basern_fp_dropout,
                      last=True)
Ejemplo n.º 7
0
    def __init__(self, name='ss', nimg=2048, nh=512, nw=512, nout=8843, ns=80, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                ns = f.attrs['ns']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout, 'ns': ns}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2*nh], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw+ns, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        scene = T.matrix('scene')
        self.inputs = [cap, img, scene]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p, loss), _ = theano.scan(fn=self.scan_func,
                                               sequences=[cap[0:-1, :], cap[1:, :]],
                                               outputs_info=[init_state, None, None],
                                               non_sequences=[scene])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # initialization for test stage
        self._init_func = None
        self._step_func = None
        self._scene_shared = theano.shared(np.zeros((1, ns)).astype(theano.config.floatX))
Ejemplo n.º 8
0
 def __init__(self, sequence_length, n_hidden_rnn, n_in_mlp, n_hidden_mlp, n_out,
         L1_reg, L2_reg, learning_rate, word_embedding, non_static):
     """
     question-answer rnn model init and definition.
     :param sequence_length: sequence length
     :param n_hidden_rnn: rnn hidden units
     :param n_in_mlp: mlp input size
     :param n_hidden_mlp: mlp hidden size
     :param n_out: mlp out size
     :param L1_reg: mlp L1 loss
     :param L2_reg: mlp L2 loss
     :param learning_rate: learning rate for update
     :param word_embedding: word embedding
     :param non_static: bool, update embedding or not
     """
     self.lr = learning_rate
     self.word_embedding = word_embedding
     # define the placeholder
     with tf.name_scope('placeholder'):
         self.q_input = tf.placeholder(tf.int64, shape=[None, sequence_length], name='query_input')
         self.a_input = tf.placeholder(tf.int64, shape=[None, sequence_length], name='answer_input')
         self.l_input = tf.placeholder(tf.int64, shape=[None], name='label_input')  # one-hot -> [batch_size. n_out]
         self.keep_prop = tf.placeholder(tf.float32, name='keep_prop')
     # transfer input to vec with embedding.
     with tf.name_scope("embedding"):
         _word_embedding = tf.get_variable(name='word_emb', shape=self.word_embedding.shape, dtype=tf.float32,
                                           initializer=tf.constant_initializer(self.word_embedding),
                                           trainable=non_static)
         q_embedding = tf.nn.embedding_lookup(_word_embedding, self.q_input)
         a_embedding = tf.nn.embedding_lookup(_word_embedding, self.a_input)
         print "input shape(embedding): ", q_embedding.get_shape()
     # define rnn model.
     with tf.variable_scope("RNN"):
         # rnn layer
         rnn_layer = RNNModule(n_hidden_rnn, cell="GRU")
         q_sentence_vec, a_sentence_vec = rnn_layer(q_embedding, a_embedding)
     # define classifier.
     with tf.name_scope("MLPDrop"):
         interact_layer = InteractLayer(n_hidden_rnn, n_hidden_rnn, dim=n_in_mlp)
         qa_vec = interact_layer(q_sentence_vec, a_sentence_vec)
         bn_layer = BatchNormLayer(n_in=n_in_mlp, inputs=qa_vec)
         classifier = MLP(bn_layer.out, n_in_mlp, n_hidden_mlp, n_out)
         # classifier = MLPDropout(bn_layer.out, n_in_mlp, n_hidden_mlp, n_out, keep_prop=self.keep_prop)
     # define cost, optimizer and output.
     self.pred_prob = classifier.pred_prob()
     self.error = classifier.errors(self.l_input)
     self.cost = classifier.cross_entropy(self.l_input) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
     self.optimizer = tf.train.RMSPropOptimizer(self.lr, 0.9).minimize(self.cost)
Ejemplo n.º 9
0
    def __init__(self, in_dims, n_enc, enc_strides, encoder_type):
        """The shared encoder function, mapping input x to hiddens.

        Args:
        encoder_type: str, type of encoder, either 'conv' or 'multi'
        n_enc: list, number of hidden units per layer in the encoder
        enc_strides: list, stride in each layer (only for 'conv' encoder_type)
        name: str, module name used for tf scope.
        """
        super(SharedEncoder, self).__init__()
        self._encoder_type = encoder_type

        if encoder_type == 'conv':
            self.encoder = SharedConvModule(in_channels=in_dims,
                                            layers_out_channels=n_enc,
                                            strides=enc_strides,
                                            kernel_size=3,
                                            activation=nn.ReLU())
        elif encoder_type == 'multi':
            self.encoder = MLP(input_dim=in_dims,
                               hidden_dims=n_enc,
                               activation=nn.ReLU(),
                               activate_final=True)
        else:
            raise ValueError('Unknown encoder_type {}'.format(encoder_type))
    def __init__(self, feature_size, field_size, embedding_size,
                 deep_layers_dim, dropout_fm, dropout_deep, act_function,
                 batch_norm, l2):
        super(DeepFM, self).__init__()

        self.feature_size = feature_size
        self.field_size = field_size
        self.embedding_size = embedding_size
        self.dropout_fm = dropout_fm
        self.deep_layers_dim = deep_layers_dim
        self.dropout_deep = dropout_deep
        self.act_function = act_function
        self.batch_norm = batch_norm
        self.l2 = l2

        self.embeddings = nn.Embedding(self.feature_size, self.embedding_size)
        self.biases = nn.Embedding(self.feature_size, 1)

        self.dropout_fm_layers = [
            nn.Dropout(dropout_fm[0]),
            nn.Dropout(dropout_fm[1])
        ]

        # deep layers
        # mlp_module = []
        in_dim = self.field_size * self.embedding_size
        self.deep_layers = MLP(in_dim, self.deep_layers_dim, self.dropout_deep,
                               self.act_function, self.batch_norm)
        self.predict_layer = nn.Linear(self.deep_layers_dim[-1] + 2,
                                       1,
                                       bias=True)
        self.weight_list = [self.predict_layer.weight
                            ] + self.deep_layers.weight_list

        self.reset_parameters()
Ejemplo n.º 11
0
 def __init__(self, args):
     super(Sarn, self).__init__()
     self.init_encoders(args)
     self.h_psi = MLP(args.cv_filter + 2 + args.te_hidden,
                      args.sarn_hp_hidden,
                      1,
                      args.sarn_hp_layer,
                      last=True)
     self.g_theta = MLP((args.cv_filter + 2) * 2 + args.te_hidden,
                        args.sarn_gt_hidden, args.sarn_gt_hidden,
                        args.sarn_gt_layer)
     self.f_phi = MLP(args.sarn_gt_hidden,
                      args.sarn_fp_hidden,
                      args.a_size,
                      args.sarn_fp_layer,
                      args.sarn_fp_dropout,
                      last=True)
Ejemplo n.º 12
0
def fit_mlp(image_size=(28, 28),
            datasets='../data/mnist.pkl.gz', outpath='../output/mnist_lenet.params',
            n_hidden=500, learning_rate=0.01, L1_reg=0.00, L2_reg=0.001,
            n_epochs=1000, batch_size=20, patience=10000,
            patience_increase=2, improvement_threshold=0.995):

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    classifier = MLP(
        rng=rng.RandomState(SEED),
        input=x,
        n_in=reduce(np.multiply, image_size),
        n_hidden=n_hidden,
        n_out=10
    )
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2
    )
    learner = SupervisedMSGD(
        index,
        x,
        y,
        batch_size,
        learning_rate,
        load_data(datasets),
        outpath,
        classifier,
        cost
    )

    best_validation_loss, best_iter, epoch, elapsed_time = learner.fit(
        n_epochs=n_epochs,
        patience=patience,
        patience_increase=patience_increase,
        improvement_threshold=improvement_threshold
    )
    display_results(best_validation_loss, elapsed_time, epoch)

    return learner
Ejemplo n.º 13
0
    def __init__(self, name='ra', nimg=2048, nnh=512, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nnh = f.attrs['nnh']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'nnh': nnh, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nnh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, nimg)).astype(theano.config.floatX))
Ejemplo n.º 14
0
def main(BERT_MODEL='bert-base-uncased',
         model_file='./models/bert-base-uncased.bin',
         data_file='./data/hotpot_dev_distractor_v1.json',
         max_new_nodes=5,
         sys2='xattn',
         attn_layers=1):
    setting = 'distractor' if data_file.find('distractor') >= 0 else 'fullwiki'
    with open(data_file, 'r') as fin:
        dataset = json.load(fin)
    tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True)
    device = torch.device(
        'cpu') if not torch.cuda.is_available() else torch.device('cuda')
    print('Loading model from {}'.format(model_file))
    model_state_dict = torch.load(model_file)
    model1 = BertForMultiHopQuestionAnswering.from_pretrained(
        BERT_MODEL, state_dict=model_state_dict['params1'])
    hidden_size = model1.config.hidden_size
    model2 = CognitiveGNN(hidden_size, model1.config, sys2)
    if args.sys2 == "xattn":
        from model import XAttn
        model2.gcn = XAttn(hidden_size,
                           model1.config,
                           n_layers=args.xattn_layers)
    elif args.sys2 == "mlp":
        from layers import MLP
        model2.gcn = MLP((hidden_size, hidden_size, 1))
    model2.load_state_dict(model_state_dict['params2'])
    sp, answer, graphs = {}, {}, {}
    print('Start inference... on {} GPUs'.format(torch.cuda.device_count()))
    model1 = torch.nn.DataParallel(model1,
                                   device_ids=range(torch.cuda.device_count()))
    model1.to(device).eval()
    model2.to(device).eval()

    with torch.no_grad():
        for data in tqdm(dataset):
            gold, ans, graph_ret, ans_nodes = cognitive_graph_propagate(
                tokenizer,
                data,
                model1,
                model2,
                device,
                setting=setting,
                max_new_nodes=max_new_nodes)
            sp[data['_id']] = list(gold)
            answer[data['_id']] = ans
            graphs[data['_id']] = graph_ret + [
                'answer_nodes: ' + ', '.join(ans_nodes)
            ]
    pred_file = data_file.replace('.json', '_pred.json')
    with open(pred_file, 'w') as fout:
        json.dump({'answer': answer, 'sp': sp, 'graphs': graphs}, fout)
Ejemplo n.º 15
0
    def __init__(self, input_dim, hidden_dim, output_dim, layer_num, duration, bias=True, activate_type='N'):
        super(MLPClassifier, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.layer_num = layer_num
        self.duration = duration
        self.bias = bias
        self.activate_type = activate_type

        self.mlp_list = nn.ModuleList()
        for i in range(self.duration):
            self.mlp_list.append(MLP(input_dim, hidden_dim, output_dim, layer_num, bias=bias, activate_type=activate_type))
Ejemplo n.º 16
0
 def __init__(self, args):
     super(RelationalNetwork, self).__init__()
     self.init_encoders(args)
     self.g_theta = MLP((args.cv_filter + 2) * 2 + args.te_hidden,
                        args.rn_gt_hidden,
                        args.rn_gt_hidden,
                        args.rn_gt_layer)
     self.f_phi = MLP(args.rn_gt_hidden,
                      args.rn_fp_hidden,
                      args.a_size,
                      args.rn_fp_layer,
                      args.rn_fp_dropout,
                      last=True)
     if args.cv_pretrained:
         self.visual_encoder = nn.Sequential(
                                 nn.Conv2d(1024, args.cv_filter, 3, 2, padding=1),
                                 nn.BatchNorm2d(args.cv_filter),
                                 nn.ReLU()
                                 # nn.Conv2d(args.cv_filter, args.cv_filter, 3, 2, padding=1),
                                 # nn.BatchNorm2d(args.cv_filter),
                                 # nn.ReLU()
         )
         self.init()
Ejemplo n.º 17
0
    def __init__(self, input_dim, hidden_dim, output_dim, trans_num, diffusion_num, bias=True, rnn_type='GRU', model_type='C', trans_activate_type='L'):
        super(CGCN, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.trans_num = trans_num
        self.diffusion_num = diffusion_num
        self.bias = bias
        self.rnn_type = rnn_type
        self.model_type = model_type
        self.trans_activate_type = trans_activate_type
        self.method_name = 'CGCN' + '-' + model_type

        assert self.model_type in ['C', 'S']
        assert self.trans_activate_type in ['L', 'N']

        if self.model_type == 'C':
            # self.mlp = nn.Linear(input_dim, hidden_dim, bias=bias)
            self.mlp = MLP(input_dim, hidden_dim, hidden_dim, trans_num, bias=bias, activate_type=trans_activate_type)
            self.duffision = CDN(hidden_dim, output_dim, output_dim, diffusion_num, rnn_type=rnn_type)
        else:
            self.mlp = MLP(input_dim, hidden_dim, output_dim, trans_num, bias=bias, activate_type=trans_activate_type)
            self.duffision = CDN(output_dim, output_dim, output_dim, diffusion_num, rnn_type=rnn_type)
Ejemplo n.º 18
0
class SceneMlp(object):
    """
    multi-layer perceptron used to predict scene-specific context
    """
    def __init__(self, name='scene_mlp', layer_sizes=(2048, 1024, 1024, 80), model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                layer_sizes = f.attrs['layer_sizes']
        self.config = {'layer_sizes': layer_sizes}

        # define inputs
        x = T.matrix('x')
        y = T.matrix('y')
        self.inputs = [x, y]

        # define computation graph
        self.mlp = MLP(layer_sizes=layer_sizes, name='mlp', output_type='softmax')
        self.proba = self.mlp.compute(x)
        self.log_proba = T.log(self.proba)

        # define costs
        def kl_divergence(p, q):
            kl = T.mean(T.sum(p * T.log((p+1e-30)/(q+1e-30)), axis=1))
            kl += T.mean(T.sum(q * T.log((q+1e-30)/(p+1e-30)), axis=1))
            return kl
        kl = kl_divergence(self.proba, y)
        acc = T.mean(T.eq(self.proba.argmax(axis=1), y.argmax(axis=1)))
        self.costs = [kl, acc]

        # layers and parameters
        self.layers = [self.mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

    def save_to_dir(self, save_dir, idx='0'):
        save_file = osp.join(save_dir, self.name+'.h5.' + str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
Ejemplo n.º 19
0
    def __init__(self,
                 num_features,
                 num_factors,
                 act_function,
                 layers,
                 batch_norm,
                 drop_prob,
                 l2,
                 pre_trained_FM=None):
        super(NFM, self).__init__()
        """
            num_features: number of features,
            num_factors: number of hidden factors,
            act_function: activation function for MLP layer,
            layers: list of dimension of deep layers,
            batch_norm: bool type, whether to use batch norm or not,
            drop_prob: list of the dropout rate for FM and MLP,
            pre_trained_FM: the pre-trained FM weights.
        """
        self.num_features = num_features
        self.num_factors = num_factors
        self.act_function = act_function
        self.layers = layers
        self.batch_norm = batch_norm
        self.drop_prob = drop_prob
        self.l2 = l2
        self.pre_trained_FM = pre_trained_FM

        self.embeddings = nn.Embedding(num_features, num_factors)
        self.biases = nn.Embedding(num_features, 1)
        self.global_bias = nn.Parameter(torch.tensor([0.0]))

        fm_modules = []
        if self.batch_norm:
            fm_modules.append(nn.BatchNorm1d(num_factors))
        fm_modules.append(nn.Dropout(drop_prob[0]))
        self.FM_layers = nn.Sequential(*fm_modules)

        # deep layers

        self.deep_layers = MLP(num_factors, self.layers, self.drop_prob,
                               self.act_function, self.batch_norm)

        predict_size = layers[-1] if layers else num_factors
        self.prediction = nn.Linear(predict_size, 1, bias=False)

        self.reset_parameters()
Ejemplo n.º 20
0
def main(rnn_type = 'simple_rnn'):
    '''
    train a language model on character data
    '''
    
    assert(rnn_type in ('simple_rnn', 'lstm'))
    
    model = Model()

    lookup = model.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM))        
    rnn = rnn_types[rnn_type](model, INPUT_DIM, HIDDEN_DIM)
    mlp = MLP(model, HIDDEN_DIM, HIDDEN_DIM, VOCAB_SIZE,
              output_nonlinearity='softmax', num_layers=NUMBER_OF_LAYERS)

    #our single training example    
    sentence = TRAINING_SENTENCE  #"a quick brown fox jumped over the lazy dog"

    train(model, rnn, mlp, lookup, sentence)
    def __init__(self, feature_size, field_size, embedding_size, deep_layers_dim, cin_layers_size, cin_split_half,
                 dropout_deep,
                 deep_act, cin_act, batch_norm, l2):
        super(XDeepFM, self).__init__()
        self.feature_size = feature_size
        self.field_size = field_size
        self.embedding_size = embedding_size
        self.deep_layers_dim = deep_layers_dim
        self.cin_layers_size = cin_layers_size
        self.cin_split_half = cin_split_half
        self.dropout_deep = dropout_deep
        self.l2 = l2
        self.deep_act = deep_act
        self.cin_act = cin_act
        self.batch_norm = batch_norm

        self.embeddings = nn.Embedding(self.feature_size, self.embedding_size)
        self.biases = nn.Embedding(self.feature_size, 1)
        self.global_bias = nn.Parameter(torch.tensor([0.0]))

        self.weight_list = []

        # deep layers
        in_dim = self.field_size * self.embedding_size
        self.deep_layers = MLP(in_dim, self.deep_layers_dim, self.dropout_deep, self.deep_act, self.batch_norm)
        self.deep_linear = nn.Linear(self.deep_layers_dim[-1], 1, bias=False)

        # CIN
        if cin_split_half:
            self.feature_map_num = sum(
                cin_layers_size[:-1]) // 2 + cin_layers_size[-1]
        else:
            self.feature_map_num = sum(cin_layers_size)
        self.cin = CIN(self.field_size, self.cin_layers_size, self.cin_act, cin_split_half)
        self.cin_linear = nn.Linear(self.feature_map_num, 1, bias=False)

        # Construct weight list
        self.weight_list.append(self.biases.weight)
        self.weight_list += self.deep_layers.weight_list
        self.weight_list.append(self.deep_linear.weight)
        self.weight_list += self.cin.weight_list
        self.weight_list.append(self.cin_linear.weight)

        self.reset_parameters()
Ejemplo n.º 22
0
    def __init__(self, hps):
        super(MixPoetAUS, self).__init__()
        self.hps = hps

        self.vocab_size = hps.vocab_size
        self.n_class1 = hps.n_class1
        self.n_class2 = hps.n_class2
        self.emb_size = hps.emb_size
        self.hidden_size = hps.hidden_size
        self.factor_emb_size = hps.factor_emb_size
        self.latent_size = hps.latent_size
        self.context_size = hps.context_size
        self.poem_len = hps.poem_len
        self.sens_num = hps.sens_num
        self.sen_len = hps.sen_len

        self.pad_idx = hps.pad_idx
        self.bos_idx = hps.bos_idx

        self.bos_tensor = torch.tensor(hps.bos_idx, dtype=torch.long, device=device).view(1, 1)

        self.gumbel_tool = GumbelSampler()

        # build postional inputs to distinguish lines at different positions
        # [sens_num, sens_num], each line is a one-hot input
        self.pos_inps = F.one_hot(torch.arange(0, self.sens_num), self.sens_num)
        self.pos_inps = self.pos_inps.type(torch.FloatTensor).to(device)


        # ----------------------------
        # build componets
        self.layers = nn.ModuleDict()
        self.layers['embed'] = nn.Embedding(self.vocab_size, self.emb_size, padding_idx=self.pad_idx)

        self.layers['encoder'] = BidirEncoder(self.emb_size, self.hidden_size, drop_ratio=hps.drop_ratio)

        # p(x|z, w, y)
        self.layers['decoder'] = Decoder(self.hidden_size, self.hidden_size, drop_ratio=hps.drop_ratio)

        # RNN to combine characters to form the representation of a word
        self.layers['word_encoder'] = BidirEncoder(self.emb_size, self.emb_size, cell='Elman',
            drop_ratio=hps.drop_ratio)

        # p(y_1|x,w), p(y_2|x,w)
        self.layers['cl_xw1'] = MLP(self.hidden_size*2+self.emb_size*2,
            layer_sizes=[self.hidden_size, 128, self.n_class1], activs=['relu', 'relu', None],
            drop_ratio=hps.drop_ratio)
        self.layers['cl_xw2'] = MLP(self.hidden_size*2+self.emb_size*2,
            layer_sizes=[self.hidden_size, 128, self.n_class2], activs=['relu', 'relu', None],
            drop_ratio=hps.drop_ratio)

        # p(y_1|w), p(y_2|w)
        self.layers['cl_w1'] = MLP(self.emb_size*2,
            layer_sizes=[self.emb_size, 64, self.n_class1], activs=['relu', 'relu', None],
            drop_ratio=hps.drop_ratio)
        self.layers['cl_w2'] = MLP(self.emb_size*2,
            layer_sizes=[self.emb_size, 64, self.n_class2], activs=['relu', 'relu', None],
            drop_ratio=hps.drop_ratio)

        # factor embedding
        self.layers['factor_embed1'] = nn.Embedding(self.n_class1, self.factor_emb_size)
        self.layers['factor_embed2'] = nn.Embedding(self.n_class2, self.factor_emb_size)

        # posteriori and prior
        self.layers['prior'] = PriorGenerator(
            self.emb_size*2+int(self.latent_size//2),
            self.latent_size, self.n_class1, self.n_class2, self.factor_emb_size)

        self.layers['posteriori'] = PosterioriGenerator(
            self.hidden_size*2+self.emb_size*2, self.latent_size,
            self.n_class1, self.n_class2, self.factor_emb_size)


        # for adversarial training
        self.layers['discriminator'] = Discriminator(self.n_class1, self.n_class2,
            self.factor_emb_size, self.latent_size, drop_ratio=hps.drop_ratio)

        #--------------
        # project the decoder hidden state to a vocanbulary-size output logit
        self.layers['out_proj'] = nn.Linear(hps.hidden_size, hps.vocab_size)

        # MLP for calculate initial decoder state
        # NOTE: Here we use a two-dimension one-hot vector as the input length embedding o_i,
        #   since there are only two kinds of line length, 5 chars and 7 chars, for Chinese
        #   classical quatrains.
        self.layers['dec_init'] = MLP(self.latent_size+self.emb_size*2+self.factor_emb_size*2,
            layer_sizes=[self.hidden_size-6],
            activs=['tanh'], drop_ratio=hps.drop_ratio)



        self.layers['map_x'] = MLP(self.context_size+self.emb_size,
            layer_sizes=[self.hidden_size],
            activs=['tanh'], drop_ratio=hps.drop_ratio)

        # update the context vector
        self.layers['context'] = ContextLayer(self.hidden_size, self.context_size)


        # two annealing parameters
        self.__tau = 1.0
        self.__teach_ratio = 1.0

        # only for pre-training
        self.layers['dec_init_pre'] = MLP(self.hidden_size*2+self.emb_size*2,
            layer_sizes=[self.hidden_size-6],
            activs=['tanh'], drop_ratio=hps.drop_ratio)
adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
    adj)
adj = adj_train

adj_label = adj_train + sp.eye(adj_train.shape[0])

adj_norm = torch.from_numpy(preprocess_graph(adj))
adj_label = torch.from_numpy(adj_label.todense().astype(np.float32))
feat = torch.from_numpy(feat.todense().astype(np.float32))

############## init model ##############
gcn_vae = GraphAE(features_dim, hidden_dim, out_dim, bias=False, dropout=0.0)
optimizer_vae = torch.optim.Adam(gcn_vae.parameters(), lr=1e-2)

mlp = MLP(features_dim, hidden_dim, out_dim, dropout=0.0)
optimizer_mlp = torch.optim.Adam(mlp.parameters(), lr=1e-2)

for batch_idx in range(num_iters):
    # train GCN
    optimizer_vae.zero_grad()
    gcn_vae.train()
    z = gcn_vae(adj_norm, feat)
    adj_h = torch.mm(z, z.t())
    vae_train_loss = reconstruction_loss(adj_label, adj_h, norm)
    vae_train_loss.backward()
    optimizer_vae.step()

    #train mlp
    optimizer_mlp.zero_grad()
    mlp.train()
Ejemplo n.º 24
0
class Model(object):
    """
    Region Attention model
    """
    def __init__(self, name='ra', nimg=2048, nnh=512, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nnh = f.attrs['nnh']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'nnh': nnh, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nnh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, nimg)).astype(theano.config.floatX))

    def compute(self, state, w_idx, feat):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
        # attention
        e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
        # lstm step
        e_w = T.concatenate([e_t, word_vec], axis=-1)
        c_t, h_t = self.lstm.compute(e_w, c_tm1, h_tm1)  # (mb,nh)
        # merge state
        new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
        # predict word probability
        p = self.pred_mlp.compute(T.concatenate([e_t, h_t, word_vec], axis=-1))
        return new_state, p, alpha

    def scan_func(self, w_tm1, w_t, state, feat):
        # update state
        new_state, p, alpha = self.compute(state, w_tm1, feat)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss, alpha

    def init_func(self, img_value):
        if self._proj_func is None:
            img = T.tensor3()
            self._proj_func = theano.function([img], self.proj_mlp.compute(img))
        if self._init_func is None:
            init_e = self._feat_shared.mean(axis=1)
            init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
            self._init_func = theano.function([], init_state)
        self._feat_shared.set_value(self._proj_func(img_value))
        return self._init_func()

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p, _ = self.compute(state, w, self._feat_shared)
            self._step_func = theano.function([state, w], [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name+'.h5.'+str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
Ejemplo n.º 25
0
class Model(object):
    """
    Region Attention model
    """
    def __init__(self, name='ra', nimg=2048, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, na)).astype(theano.config.floatX))

    def compute(self, state, w_idx, feat):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
        # attention
        e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
        # lstm step
        e_w = T.concatenate([e_t, word_vec], axis=-1)
        c_t, h_t = self.lstm.compute(e_w, c_tm1, h_tm1)  # (mb,nh)
        # merge state
        new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
        # predict word probability
        p = self.pred_mlp.compute(T.concatenate([e_t, h_t, word_vec], axis=-1))
        return new_state, p, alpha

    def scan_func(self, w_tm1, w_t, state, feat):
        # update state
        new_state, p, alpha = self.compute(state, w_tm1, feat)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss, alpha

    def init_func(self, img_value):
        if self._proj_func is None:
            img = T.tensor3()
            self._proj_func = theano.function([img], self.proj_mlp.compute(img))
        if self._init_func is None:
            init_e = self._feat_shared.mean(axis=1)
            init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
            self._init_func = theano.function([], init_state)
        self._feat_shared.set_value(self._proj_func(img_value))
        return self._init_func()

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p, _ = self.compute(state, w, self._feat_shared)
            self._step_func = theano.function([state, w], [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name+'.h5.'+str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
Ejemplo n.º 26
0
class Model(object):
    """
    scene-specific contexts
    """
    def __init__(self, name='ss', nimg=2048, nh=512, nw=512, nout=8843, ns=80, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                ns = f.attrs['ns']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout, 'ns': ns}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2*nh], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw+ns, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        scene = T.matrix('scene')
        self.inputs = [cap, img, scene]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p, loss), _ = theano.scan(fn=self.scan_func,
                                               sequences=[cap[0:-1, :], cap[1:, :]],
                                               outputs_info=[init_state, None, None],
                                               non_sequences=[scene])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # initialization for test stage
        self._init_func = None
        self._step_func = None
        self._scene_shared = theano.shared(np.zeros((1, ns)).astype(theano.config.floatX))

    def compute(self, state, w_idx, scene):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        c_tm1, h_tm1 = split_state(state, scheme=[(2, self.config['nh'])])
        # lstm step
        w_s = T.concatenate([word_vec, scene], axis=1)
        c_t, h_t = self.lstm.compute(w_s, c_tm1, h_tm1)
        # merge state
        new_state = T.concatenate([c_t, h_t], axis=-1)
        # add w_{t-1} as feature
        h_and_w = T.concatenate([h_t, word_vec], axis=-1)
        # predict probability
        p = self.pred_mlp.compute(h_and_w)
        return new_state, p

    def scan_func(self, w_tm1, w_t, state, scene):
        # update state
        new_state, p = self.compute(state, w_tm1, scene)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss

    def init_func(self, img_value, scene_value):
        if self._init_func is None:
            img = T.matrix()
            init_state = self.proj_mlp.compute(img)
            self._init_func = theano.function([img], init_state)
        self._scene_shared.set_value(scene_value)
        return self._init_func(img_value)

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p = self.compute(state, w, self._scene_shared)
            self._step_func = theano.function([state, w], [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name+'.h5.'+str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
Ejemplo n.º 27
0
 def __init__(self, input_size, config, n_layers=1):
     super(XAttn, self).__init__()
     layer = MPLayer(input_size, config)
     self.layer = nn.ModuleList(
         [copy.deepcopy(layer) for _ in range(n_layers)])
     self.predict = MLP(input_sizes=(input_size, input_size, 1))
Ejemplo n.º 28
0
    def __init__(self, hps, device):
        super(WorkingMemoryModel, self).__init__()
        self.hps = hps
        self.device = device

        self.global_trace_size = hps.global_trace_size
        self.topic_trace_size = hps.topic_trace_size
        self.topic_slots = hps.topic_slots
        self.his_mem_slots = hps.his_mem_slots

        self.vocab_size = hps.vocab_size
        self.mem_size = hps.mem_size

        self.sens_num = hps.sens_num

        self.pad_idx = hps.pad_idx
        self.bos_tensor = torch.tensor(hps.bos_idx, dtype=torch.long, device=device)

        # ----------------------------
        # build componets
        self.layers = nn.ModuleDict()
        self.layers['word_embed'] = nn.Embedding(hps.vocab_size,
            hps.word_emb_size, padding_idx=hps.pad_idx)

        # NOTE: We set fixed 33 phonology categories: 0~32
        #   please refer to preprocess.py for more details
        self.layers['ph_embed'] = nn.Embedding(33, hps.ph_emb_size)

        self.layers['len_embed'] = nn.Embedding(hps.sen_len, hps.len_emb_size)


        self.layers['encoder'] = BidirEncoder(hps.word_emb_size, hps.hidden_size, drop_ratio=hps.drop_ratio)
        self.layers['decoder'] = Decoder(hps.hidden_size, hps.hidden_size, drop_ratio=hps.drop_ratio)

        # project the decoder hidden state to a vocanbulary-size output logit
        self.layers['out_proj'] = nn.Linear(hps.hidden_size, hps.vocab_size)

        # update the context vector
        self.layers['global_trace_updater'] = ContextLayer(hps.hidden_size, hps.global_trace_size)
        self.layers['topic_trace_updater'] = MLP(self.mem_size+self.topic_trace_size,
            layer_sizes=[self.topic_trace_size], activs=['tanh'], drop_ratio=hps.drop_ratio)


        # MLP for calculate initial decoder state
        self.layers['dec_init'] = MLP(hps.hidden_size*2, layer_sizes=[hps.hidden_size],
            activs=['tanh'], drop_ratio=hps.drop_ratio)
        self.layers['key_init'] = MLP(hps.hidden_size*2, layer_sizes=[hps.hidden_size],
            activs=['tanh'], drop_ratio=hps.drop_ratio)

        # history memory reading and writing layers
        # query: concatenation of hidden state, global_trace and topic_trace
        self.layers['memory_read'] = AttentionReader(
            d_q=hps.hidden_size+self.global_trace_size+self.topic_trace_size+self.topic_slots,
            d_v=hps.mem_size, drop_ratio=hps.attn_drop_ratio)

        self.layers['memory_write'] = AttentionWriter(hps.mem_size+self.global_trace_size, hps.mem_size)

        # NOTE: a layer to compress the encoder hidden states to a smaller size for larger number of slots
        self.layers['mem_compress'] = MLP(hps.hidden_size*2, layer_sizes=[hps.mem_size],
            activs=['tanh'], drop_ratio=hps.drop_ratio)

        # [inp, attns, ph_inp, len_inp, global_trace]
        self.layers['merge_x'] = MLP(
            hps.word_emb_size+hps.ph_emb_size+hps.len_emb_size+hps.global_trace_size+hps.mem_size,
            layer_sizes=[hps.hidden_size],
            activs=['tanh'], drop_ratio=hps.drop_ratio)


        # two annealing parameters
        self._tau = 1.0
        self._teach_ratio = 0.8


        # ---------------------------------------------------------
        # only used for for pre-training
        self.layers['dec_init_pre'] = MLP(hps.hidden_size*2,
            layer_sizes=[hps.hidden_size],
            activs=['tanh'], drop_ratio=hps.drop_ratio)

        self.layers['merge_x_pre'] = MLP(
            hps.word_emb_size+hps.ph_emb_size+hps.len_emb_size,
            layer_sizes=[hps.hidden_size],
            activs=['tanh'], drop_ratio=hps.drop_ratio)
Ejemplo n.º 29
0
    def __init__(self,
                 name='gnic',
                 nimg=2048,
                 nh=512,
                 nw=512,
                 nout=8843,
                 model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout,
                                   dim_emb=nw,
                                   name=self.name + '@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2 * nh],
                            output_type='tanh',
                            name=self.name + '@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw, dim_h=nh, name=self.name + '@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh + nw, nout],
                            output_type='softmax',
                            name=self.name + '@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        self.inputs = [cap, img]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p,
         loss), _ = theano.scan(fn=self.scan_func,
                                sequences=[cap[0:-1, :], cap[1:, :]],
                                outputs_info=[init_state, None, None])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions are used in test stage
        self._init_func = None
        self._step_func = None
Ejemplo n.º 30
0
class Model(object):
    """
    an re-implementation of google NIC system, used as the baseline in our paper
    """
    def __init__(self,
                 name='gnic',
                 nimg=2048,
                 nh=512,
                 nw=512,
                 nout=8843,
                 model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout,
                                   dim_emb=nw,
                                   name=self.name + '@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2 * nh],
                            output_type='tanh',
                            name=self.name + '@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw, dim_h=nh, name=self.name + '@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh + nw, nout],
                            output_type='softmax',
                            name=self.name + '@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        self.inputs = [cap, img]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p,
         loss), _ = theano.scan(fn=self.scan_func,
                                sequences=[cap[0:-1, :], cap[1:, :]],
                                outputs_info=[init_state, None, None])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions are used in test stage
        self._init_func = None
        self._step_func = None

    def compute(self, state, w_idx):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        c_tm1, h_tm1 = split_state(state, scheme=[(2, self.config['nh'])])
        # lstm step
        c_t, h_t = self.lstm.compute(word_vec, c_tm1, h_tm1)
        # merge state
        new_state = T.concatenate([c_t, h_t], axis=-1)
        # add w_{t-1} as feature
        h_and_w = T.concatenate([h_t, word_vec], axis=-1)
        # predict probability
        p = self.pred_mlp.compute(h_and_w)
        return new_state, p

    def scan_func(self, w_tm1, w_t, state):
        # update state
        new_state, p = self.compute(state, w_tm1)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss

    def init_func(self, img_value):
        if self._init_func is None:
            img = T.matrix()
            init_state = self.proj_mlp.compute(img)
            self._init_func = theano.function([img], init_state)
        return self._init_func(img_value)

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p = self.compute(state, w)
            self._step_func = theano.function([state, w],
                                              [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name + '.h5.' + str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
Ejemplo n.º 31
0
    def __init__(self,
                 z_shape,
                 output_shape,
                 decoder_type,
                 n_dec,
                 dec_up_strides,
                 n_x,
                 n_y,
                 shared_encoder_conv_shapes=None):
        """Module initialization

        Args:
            output_shape: list, shape of output (not including batch dimension).
            decoder_type: str, 'single', 'multi', or 'deconv'.
            n_dec: list, number of hidden units per layer in the decoder
            dec_up_strides: list, stride in each layer (only for 'deconv' decoder_type).
            n_x: int, number of dims of x.
            n_y: int, number of dims of y.
            shared_encoder_conv_shapes: the shapes of the activations of the
              intermediate layers of the encoder.

        Returns:
            Instance of the LatentDecoder 
        """

        super(LatentDecoder, self).__init__()
        self.decoder_type = decoder_type
        self.n_y = n_y

        n_out_factor = 1
        self.out_shape = list(output_shape)

        # Upsample layer (deconvolutional, bilinear, ..).
        if decoder_type == 'deconv':

            # First, check that the encoder is convolutional too (needed for batchnorm)
            if shared_encoder_conv_shapes is None:
                raise ValueError(
                    'Shared encoder does not contain conv_shapes.')

            num_output_channels = output_shape[-1]
            self.decoder = ConvDecoder(
                output_dims=n_dec,
                kernel_size=3,
                activation=nn.ReLU(),
                dec_up_strides=dec_up_strides,
                enc_conv_shapes=shared_encoder_conv_shapes,
                n_c=num_output_channels * n_out_factor,
                method=decoder_type)

        # Multiple MLP decoders, one for each component.
        # NOTE the 'multi' option is not in working condition and probably never will
        elif decoder_type == 'multi':
            self.decoder = []
            for k in range(n_y):
                mlp_decoding = MLP(input_dim=z_shape,
                                   hidden_dims=n_dec + [n_x * n_out_factor],
                                   activation=nn.ReLU(),
                                   activate_final=False)
                self.decoder.append(mlp_decoding)

        # Single (shared among components) MLP decoder.
        elif decoder_type == 'single':
            self.decoder = MLP(
                input_dim=z_shape,
                hidden_dims=n_dec + [n_x * n_out_factor],
                activation=nn.ReLU(),
                activate_final=False,
            )
        else:
            raise ValueError(f'Unknown decoder_type {decoder_type}')
Ejemplo n.º 32
0
class LatentDecoder(nn.Module):
    """The data decoder module, modelling p(x | z)."""
    def __init__(self,
                 z_shape,
                 output_shape,
                 decoder_type,
                 n_dec,
                 dec_up_strides,
                 n_x,
                 n_y,
                 shared_encoder_conv_shapes=None):
        """Module initialization

        Args:
            output_shape: list, shape of output (not including batch dimension).
            decoder_type: str, 'single', 'multi', or 'deconv'.
            n_dec: list, number of hidden units per layer in the decoder
            dec_up_strides: list, stride in each layer (only for 'deconv' decoder_type).
            n_x: int, number of dims of x.
            n_y: int, number of dims of y.
            shared_encoder_conv_shapes: the shapes of the activations of the
              intermediate layers of the encoder.

        Returns:
            Instance of the LatentDecoder 
        """

        super(LatentDecoder, self).__init__()
        self.decoder_type = decoder_type
        self.n_y = n_y

        n_out_factor = 1
        self.out_shape = list(output_shape)

        # Upsample layer (deconvolutional, bilinear, ..).
        if decoder_type == 'deconv':

            # First, check that the encoder is convolutional too (needed for batchnorm)
            if shared_encoder_conv_shapes is None:
                raise ValueError(
                    'Shared encoder does not contain conv_shapes.')

            num_output_channels = output_shape[-1]
            self.decoder = ConvDecoder(
                output_dims=n_dec,
                kernel_size=3,
                activation=nn.ReLU(),
                dec_up_strides=dec_up_strides,
                enc_conv_shapes=shared_encoder_conv_shapes,
                n_c=num_output_channels * n_out_factor,
                method=decoder_type)

        # Multiple MLP decoders, one for each component.
        # NOTE the 'multi' option is not in working condition and probably never will
        elif decoder_type == 'multi':
            self.decoder = []
            for k in range(n_y):
                mlp_decoding = MLP(input_dim=z_shape,
                                   hidden_dims=n_dec + [n_x * n_out_factor],
                                   activation=nn.ReLU(),
                                   activate_final=False)
                self.decoder.append(mlp_decoding)

        # Single (shared among components) MLP decoder.
        elif decoder_type == 'single':
            self.decoder = MLP(
                input_dim=z_shape,
                hidden_dims=n_dec + [n_x * n_out_factor],
                activation=nn.ReLU(),
                activate_final=False,
            )
        else:
            raise ValueError(f'Unknown decoder_type {decoder_type}')

    def forward(self, z, y, is_training=True, test_local_stats=True):
        """The Module's forward function

        Args:
            z: Latent variables, `Tensor` of size `[B, n_z]`.
            y: Categorical cluster variable, `Tensor` of size `[B, n_y]`.
            is_training: Boolean, whether to build the training graph or an evaluation
              graph.
            test_local_stats: Boolean, whether to use the test batch statistics at test
              time for batch norm (default) or the moving averages.   

        Returns:
            Bernouilli distribution 'p(x | z)'
        """
        if z.dim() != 2:
            raise NotImplementedError(
                f'The data decoder function expects `z` to be 2D, but its shape was {z.shape} instead.'
            )
        if y.dim() != 2:
            raise NotImplementedError(
                f'The data decoder function expects `y` to be 2D, but its shape was {y.shape} instead.'
            )

        if self.decoder_type == 'deconv':
            logits = self.decoder(z,
                                  is_training=is_training,
                                  test_local_stats=test_local_stats)
            # n_out_factor in last dim
            logits = logits.view([-1] + self.out_shape)

        elif self.decoder_type == 'multi':
            all_logits = []
            for k in range(n_y):
                logits = self.decoder[k](z)
                all_logits.append(logits)

            all_logits = torch.stack(all_logits)
            logits = torch.einsum('ij,jik->ik', y, all_logits)
            logits = logits.view([-1] + self.out_shape)  # Back to 4D

        elif self.decoder_type == 'single':
            logits = self.decoder(z)
            logits = logits.view([-1] + self.out_shape)  # Back to 4D

        return logits
Ejemplo n.º 33
0
        # Task #2
        print("Loading model from {}".format(args.load_path))
        model_state_dict = torch.load(args.load_path)
        model1 = BertForMultiHopQuestionAnswering.from_pretrained(
            args.bert_model, state_dict=model_state_dict["params1"])
        hidden_size = model1.config.hidden_size
        model2 = CognitiveGNN(hidden_size, model1.config, args.sys2)
        model2.load_state_dict(model_state_dict["params2"])
        if args.sys2 == "xattn":
            from model import XAttn
            model2.gcn = XAttn(model1.config.hidden_size,
                               model1.config,
                               n_layers=args.xattn_layers)
        elif args.sys2 == "mlp":
            from layers import MLP
            model2.gcn = MLP((hidden_size, hidden_size, 1))

    model1 = torch.nn.DataParallel(model1,
                                   device_ids=range(torch.cuda.device_count()))
    print(model1, model2)
    model1, model2 = train(
        train_bundles,
        valid_bundles,
        model1=model1,
        mode=args.mode,
        model2=model2,
        batch_size=args.batch_size,
        num_epochs=args.num_epochs,
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        lr1=args.lr1,
        lr2=args.lr2,
Ejemplo n.º 34
0
    def __init__(self,
                 word_V,
                 dep_V,
                 word_d=100,
                 pos_d=25,
                 mlp_d=100,
                 mlp_label_d=100,
                 num_lstm_layers=2,
                 lstm_d=125,
                 embeddings_init=None,
                 pos_V=None,
                 seed=0,
                 verbose=False):
        '''
        word_V - size of word vocab
        dep_V - size of relation label vocab
        word_d - dimension of word embeddings
        pos_d - dimension of POS embeddings
        mlp_d - dimension of hidden layer for arc prediction MLP
        mlp_label_d - dimension of hidden layer for label prediction MLP
        num_lstm_layers - number of bi-directional LSTM layers to stack
        lstm_d - dimension of hidden state in the LSTM
        embeddings_init - use pre-trained embeddings
        pos_V - size of POS vocab
        seed - random seed for initialization
        verbose - whether to print information about these parameters
        '''

        if verbose:
            print('Word vocabulary size: {}'.format(word_V))
            print('Dependency relation vocabulary size: {}'.format(dep_V))
            print('POS vocabulary size: {}'.format(pos_V))

        self.word_V = word_V
        self.dep_V = dep_V
        self.pos_V = pos_V

        self.word_d = word_d
        self.pos_d = pos_d
        self.mlp_d = mlp_d
        self.mlp_label_d = mlp_label_d
        self.lstm_layers = num_lstm_layers
        self.lstm_d = lstm_d

        np.random.seed(seed)

        self.model = dynet.Model()

        #embedding layers for words and POS
        self.embeddings = self.model.add_lookup_parameters(
            (self.word_V, self.word_d))
        if pos_V is not None:
            self.pos_embeddings = self.model.add_lookup_parameters(
                (self.pos_V, self.pos_d))

        #bi-directional LSTM layers
        #embeddings -> layer1 -> layer2
        lstm_layers = []
        for i in range(num_lstm_layers):
            input_d = word_d
            if i:
                input_d = 2 * lstm_d
            elif pos_V is not None:
                input_d += pos_d

            fwd_lstm_layer = LSTM(self.model, input_d, lstm_d)
            rev_lstm_layer = LSTM(self.model, input_d, lstm_d, reverse=True)
            lstm_layers.append((fwd_lstm_layer, rev_lstm_layer))

        #arc prediction MLP
        #layer2(i), layer2(j) -> concatenate -> score
        mlp_layer = MLP(self.model, lstm_d * 4, mlp_d, 1)
        #label prediction MLP
        if mlp_label_d:
            mlp_label_layer = MLP(self.model, lstm_d * 4, mlp_label_d, dep_V)
        else:
            mlp_label_layer = None

        #train the model using Adam optimizer
        self.trainer = dynet.AdamTrainer(self.model)

        #take in word and pos_indices, return the output of the 2nd layer
        def get_lstm_output(indices, pos_indices=None):
            embeddings_out = [self.embeddings[w] for w in indices]
            x = embeddings_out

            if pos_V is not None and pos_indices is not None:
                x = []
                for i, input in enumerate(embeddings_out):
                    x.append(
                        dynet.concatenate(
                            [input, self.pos_embeddings[pos_indices[i]]]))

            for i in range(num_lstm_layers):
                x_1 = lstm_layers[i][0].get_output(x)[0]
                x_2 = lstm_layers[i][1].get_output(x)[0]
                x = [
                    dynet.concatenate([x_1[i], x_2[i]])
                    for i in range(len(indices))
                ]

            return x

        self.states = get_lstm_output

        #score all arcs from i to j using the arc prediction MLP
        def score_arcs(states, value=True):
            length = len(states)
            scores = [[None for i in range(length)] for j in range(length)]

            for i in range(length):
                for j in range(length):
                    score = mlp_layer.get_output(
                        dynet.concatenate([states[i], states[j]]))
                    if value:
                        scores[i][j] = score.scalar_value()
                    else:
                        scores[i][j] = score

            return scores

        self.score_arcs = score_arcs

        #score all labels at i using the label prediction MLP
        def score_labels(states, arcs, value=True):
            scores = []

            for i in range(len(states)):
                score = mlp_label_layer.get_output(
                    dynet.concatenate([states[i], states[arcs[i]]]))
                if value:
                    scores.append(score.value())
                else:
                    scores.append(score)

            return scores

        self.score_labels = score_labels
Ejemplo n.º 35
0
 def __init__(self, hidden_size, config, module_type):
     super(CognitiveGNN, self).__init__()
     self.gcn = GCN(hidden_size)
     self.both_net = MLP((hidden_size, hidden_size, 1))
     self.select_net = MLP((hidden_size, hidden_size, 1))
     self.module_type = module_type