예제 #1
0
    def __init__(self,
                 slots,
                 slot_classes,
                 emb_size,
                 no_train_emb,
                 x_include_score,
                 x_include_token_ftrs,
                 x_include_mlp,
                 n_input_tokens,
                 n_input_score_bins,
                 n_cells,
                 rnn_n_layers,
                 lstm_peepholes,
                 lstm_bidi,
                 opt_type,
                 oclf_n_hidden,
                 oclf_n_layers,
                 oclf_activation,
                 debug,
                 p_drop,
                 init_emb_from,
                 vocab,
                 input_n_layers,
                 input_n_hidden,
                 input_activation,
                 token_features,
                 token_supervision,
                 momentum,
                 enable_branch_exp,
                 l1,
                 l2,
                 build_train=True):
        args = SimpleConvModel.__init__.func_code.co_varnames[:SimpleConvModel.
                                                              __init__.
                                                              func_code.
                                                              co_argcount]
        self.init_args = {}
        for arg in args:
            if arg != 'self':
                self.init_args[arg] = locals()[arg]

        self.vocab = vocab

        self.slots = slots
        self.slot_classes = slot_classes

        logging.info('We have the following classes:')
        self._log_classes_info()

        self.x_include_score = x_include_score
        self.token_supervision = token_supervision

        x = T.imatrix()
        input_args = [x]
        input_token_layer = Embedding(name="emb",
                                      size=emb_size,
                                      n_features=n_input_tokens,
                                      input=x,
                                      static=no_train_emb)

        prev_layer = input_token_layer

        y_seq_id = tt.ivector()
        y_time = tt.ivector()
        y_weight = tt.vector()
        y_label = {}
        for slot in slots:
            y_label[slot] = tt.ivector(name='y_label_%s' % slot)

        if x_include_score:
            x_score = tt.matrix()
            input_args.append(x_score)

        maxpool = SeqMaxPooling()
        maxpool.connect(prev_layer, x_score, y_time, y_seq_id)
        prev_layer = maxpool
        #unwrap = SeqUnwrapper(250)
        #unwrap.connect(prev_layer, y_time, y_seq_id)
        #prev_layer = unwrap
        #rng = np.random.RandomState(23455)
        #conv = LeNetConvPoolLayer()
        #conv.connect(prev_layer, rng, (20, 1, 5, emb_size), (8, 1, ))
        #prev_layer = conv
        #logging.info('Conv output size: %d' % conv.size)

        costs = []
        predictions = []
        for slot in slots:
            logging.info('Building output classifier for %s.' % slot)
            n_classes = len(slot_classes[slot])
            slot_mlp = MLP([oclf_n_hidden] * oclf_n_layers + [n_classes],
                           [oclf_activation] * oclf_n_layers + ['softmax'],
                           [p_drop] * oclf_n_layers + [0.0],
                           name="mlp_%s" % slot)
            slot_mlp.connect(prev_layer)
            predictions.append(slot_mlp.output(dropout_active=False))

            slot_objective = CrossEntropyObjective()
            slot_objective.connect(y_hat_layer=slot_mlp, y_true=y_label[slot])
            costs.append(slot_objective)

        cost = SumOut()
        cost.connect(*costs)  #, scale=1.0 / len(slots))
        self.params = params = list(cost.get_params())
        n_params = sum(p.get_value().size for p in params)
        logging.info('This model has %d parameters:' % n_params)
        for param in sorted(params, key=lambda x: x.name):
            logging.info('  - %20s: %10d' % (
                param.name,
                param.get_value().size,
            ))

        cost_value = cost.output(dropout_active=True)

        assert opt_type == 'sgd'
        lr = tt.scalar('lr')
        clipnorm = 0.5
        reg = updates.Regularizer(l1=l1, l2=l2)
        updater = updates.SGD(lr=lr, clipnorm=clipnorm, regularizer=reg)

        loss_args = list(input_args)
        loss_args += [y_seq_id, y_time]
        loss_args += [y_label[slot] for slot in slots]

        if build_train:
            model_updates = updater.get_updates(params, cost_value)

            train_args = [lr] + loss_args
            update_ratio = updater.get_update_ratio(params, model_updates)

            logging.info('Preparing %s train function.' % opt_type)
            t = time.time()
            self._train = theano.function(train_args,
                                          [cost_value, update_ratio],
                                          updates=model_updates)
            logging.info('Preparation done. Took: %.1f' % (time.time() - t))

        self._loss = theano.function(loss_args, cost_value)

        logging.info('Preparing predict function.')
        t = time.time()
        predict_args = list(input_args)
        predict_args += [y_seq_id, y_time]
        self._predict = theano.function(predict_args, predictions)
        logging.info('Done. Took: %.1f' % (time.time() - t))
예제 #2
0
파일: model.py 프로젝트: hydercps/xtrack2
    def __init__(self, slots, slot_classes, emb_size, no_train_emb,
                 x_include_score, x_include_token_ftrs, x_include_mlp,
                 n_input_tokens, n_input_score_bins, n_cells,
                 rnn_n_layers,
                 lstm_peepholes, lstm_bidi, opt_type,
                 oclf_n_hidden, oclf_n_layers, oclf_activation,
                 debug, p_drop,
                 init_emb_from, vocab,
                 input_n_layers, input_n_hidden, input_activation,
                 token_features, token_supervision,
                 momentum, enable_branch_exp, l1, l2, build_train=True):
        args = Model.__init__.func_code.co_varnames[:Model.__init__.func_code.co_argcount]
        self.init_args = {}
        for arg in args:
            if arg != 'self':
                self.init_args[arg] = locals()[arg]

        self.vocab = vocab

        self.slots = slots
        self.slot_classes = slot_classes


        logging.info('We have the following classes:')
        self._log_classes_info()

        self.x_include_score = x_include_score
        self.token_supervision = token_supervision

        x = T.imatrix()
        input_args = [x]
        input_token_layer = Embedding(name="emb",
                                      size=emb_size,
                                      n_features=n_input_tokens,
                                      input=x,
                                      static=no_train_emb)
        if init_emb_from:
            input_token_layer.init_from(init_emb_from, vocab)
            logging.info('Initializing token embeddings from: %s'
                         % init_emb_from)
        else:
            logging.info('Initializing token embedding randomly.')
        self.input_emb = input_token_layer.wv

        prev_layer = input_token_layer



        input_layers = [
             input_token_layer
        ]
        if x_include_score:
            x_score = tt.imatrix()
            input_score_layer = Embedding(name="emb_score",
                                          size=emb_size,
                                          n_features=n_input_score_bins,
                                          input=x_score)
            input_layers.append(input_score_layer)

            input_args.append(x_score)

        if x_include_token_ftrs:
            token_n_features = len(token_features.values()[0])
            input_token_features_layer = Embedding(name="emb_ftr",
                                                   size=token_n_features,
                                                   n_features=n_input_tokens,
                                                   input=x,
                                                   static=True)
            input_token_features_layer.init_from_dict(token_features)

            ftrs_to_emb = Dense(name='ftr2emb',
                                size=emb_size,
                                activation='linear')
                                # FIX: p_drop=p_drop)
            ftrs_to_emb.connect(input_token_features_layer)
            input_layers.append(ftrs_to_emb)

        sum_layer = SumLayer(layers=input_layers)
        prev_layer = sum_layer

        if input_n_layers > 0:
            input_transform = MLP([input_n_hidden  ] * input_n_layers,
                                  [input_activation] * input_n_layers,
                                  p_drop=p_drop)
            input_transform.connect(prev_layer)
            prev_layer = input_transform

        if token_supervision:
            slot_value_pred = MLP([len(slots) * 2], ['sigmoid'],
                                  p_drop=[p_drop], name='ts')
            slot_value_pred.connect(prev_layer)

            y_tokens_label = tt.itensor3()
            token_supervision_loss_layer = TokenSupervisionLossLayer()
            token_supervision_loss_layer.connect(slot_value_pred, y_tokens_label)

            if debug:
                self._token_supervision_loss = theano.function(input_args + [
                    y_tokens_label], token_supervision_loss_layer.output())

        else:
            token_supervision_loss_layer = None
            y_tokens_label = None




        logging.info('There are %d input layers.' % input_n_layers)

        if debug:
            self._lstm_input = theano.function(input_args, prev_layer.output())

        h_t_layer = IdentityInput(None, n_cells)
        mlps = []
        mlp_params = []
        for slot in slots:
            n_classes = len(slot_classes[slot])
            slot_mlp = MLP([oclf_n_hidden  ] * oclf_n_layers + [n_classes],
                           [oclf_activation] * oclf_n_layers + ['softmax'],
                           [0.0            ] * oclf_n_layers + [0.0      ],
                           name="mlp_%s" % slot)
            slot_mlp.connect(h_t_layer)
            mlps.append(slot_mlp)
            mlp_params.extend(slot_mlp.get_params())


        for i in range(rnn_n_layers):
            # Forward LSTM layer.
            logging.info('Creating LSTM layer with %d neurons.' % (n_cells))
            if x_include_mlp:
                f_lstm_layer = LstmWithMLP(name="flstm_%d" % i,
                                       size=n_cells,
                                       seq_output=True,
                                       out_cells=False,
                                       peepholes=lstm_peepholes,
                                       p_drop=p_drop,
                                       enable_branch_exp=enable_branch_exp,
                                       mlps=mlps)
            else:
                f_lstm_layer = LstmRecurrent(name="flstm_%d" % i,
                                       size=n_cells,
                                       seq_output=True,
                                       out_cells=False,
                                       peepholes=lstm_peepholes,
                                       p_drop=p_drop,
                                       enable_branch_exp=enable_branch_exp
                )

            f_lstm_layer.connect(prev_layer)





            if lstm_bidi:
                b_lstm_layer = LstmRecurrent(name="blstm_%d" % i,
                                       size=n_cells,
                                       seq_output=True,
                                       out_cells=False,
                                       backward=True,
                                       peepholes=lstm_peepholes,
                                       p_drop=p_drop,
                                       enable_branch_exp=enable_branch_exp)
                b_lstm_layer.connect(prev_layer)

                lstm_zip = ZipLayer(concat_axis=2, layers=[f_lstm_layer,
                                                         b_lstm_layer])
                prev_layer = lstm_zip
                if debug:
                    self._lstm_output = theano.function(input_args,
                                                   [prev_layer.output(),
                                                    f_lstm_layer.output(),
                                                    b_lstm_layer.output()])
            else:

                if debug:
                    self._lstm_output = theano.function(input_args,
                                                        [prev_layer.output(),
                                                         f_lstm_layer.output()])

                prev_layer = f_lstm_layer

        assert prev_layer is not None

        y_seq_id = tt.ivector()
        y_time = tt.ivector()
        y_weight = tt.vector()
        y_label = {}
        for slot in slots:
            y_label[slot] = tt.ivector(name='y_label_%s' % slot)

        cpt = CherryPick()
        cpt.connect(prev_layer, y_time, y_seq_id)

        costs = []
        predictions = []
        for slot, slot_lstm_mlp in zip(slots, mlps):
            logging.info('Building output classifier for %s.' % slot)
            n_classes = len(slot_classes[slot])
            if oclf_n_layers > 0:
                slot_mlp = MLP([oclf_n_hidden  ] * oclf_n_layers,
                               [oclf_activation] * oclf_n_layers,
                               [p_drop         ] * oclf_n_layers,
                               name="mlp_%s" % slot)
                #name="mlp_%s" % slot, init=inits.copy(mlp_params))
                slot_mlp.connect(cpt)

            slot_softmax = BiasedSoftmax(name='softmax_%s' % slot, size=n_classes)
            if oclf_n_layers > 0:
                slot_softmax.connect(slot_mlp)
            else:
                slot_softmax.connect(cpt)

            predictions.append(slot_softmax.output(dropout_active=False))

            slot_objective = WeightedCrossEntropyObjective()
            slot_objective.connect(
                y_hat_layer=slot_softmax,
                y_true=y_label[slot],
                y_weights=y_weight
            )
            costs.append(slot_objective)
        if token_supervision:
            costs.append(token_supervision_loss_layer)

        cost = SumOut()
        cost.connect(*costs)  #, scale=1.0 / len(slots))
        self.params = params = list(cost.get_params())
        n_params = sum(p.get_value().size for p in params)
        logging.info('This model has %d parameters:' % n_params)
        for param in sorted(params, key=lambda x: x.name):
            logging.info('  - %20s: %10d' % (param.name, param.get_value(

            ).size, ))

        cost_value = cost.output(dropout_active=True)

        lr = tt.scalar('lr')
        clipnorm = 0.5
        reg = updates.Regularizer(l1=l1, l2=l2)
        if opt_type == "rprop":
            updater = updates.RProp(lr=lr, clipnorm=clipnorm)
            model_updates = updater.get_updates(params, cost_value)
        elif opt_type == "sgd":
            updater = updates.SGD(lr=lr, clipnorm=clipnorm, regularizer=reg)
        elif opt_type == "rmsprop":
            updater = updates.RMSprop(lr=lr, clipnorm=clipnorm, regularizer=reg)  #, regularizer=reg)
        elif opt_type == "adam":
            #reg = updates.Regularizer(maxnorm=5.0)
            updater = updates.Adam(lr=lr, clipnorm=clipnorm, regularizer=reg)  #,
            # regularizer=reg)
        elif opt_type == "momentum":
            updater = updates.Momentum(lr=lr, momentum=momentum, clipnorm=clipnorm, regularizer=reg)
        else:
            raise Exception("Unknonw opt.")

        loss_args = list(input_args)
        loss_args += [y_seq_id, y_time]
        loss_args += [y_weight]
        loss_args += [y_label[slot] for slot in slots]
        if token_supervision:
            loss_args += [y_tokens_label]

        if build_train:
            model_updates = updater.get_updates(params, cost_value)

            train_args = [lr] + loss_args
            update_ratio = updater.get_update_ratio(params, model_updates)

            logging.info('Preparing %s train function.' % opt_type)
            t = time.time()
            self._train = theano.function(train_args, [cost_value, update_ratio],
                                          updates=model_updates)
            logging.info('Preparation done. Took: %.1f' % (time.time() - t))

        self._loss = theano.function(loss_args, cost_value)

        logging.info('Preparing predict function.')
        t = time.time()
        predict_args = list(input_args)
        predict_args += [y_seq_id, y_time]
        self._predict = theano.function(
            predict_args,
            predictions
        )
        logging.info('Done. Took: %.1f' % (time.time() - t))
예제 #3
0
    def __init__(self,
                 slots,
                 slot_classes,
                 opt_type,
                 oclf_n_hidden,
                 oclf_n_layers,
                 oclf_activation,
                 n_cells,
                 debug,
                 p_drop,
                 vocab,
                 input_n_layers,
                 input_n_hidden,
                 input_activation,
                 token_features,
                 token_supervision,
                 momentum,
                 enable_branch_exp,
                 l1,
                 l2,
                 build_train=True):
        args = BaselineModel.__init__.func_code.co_varnames[:BaselineModel.
                                                            __init__.func_code.
                                                            co_argcount]
        self.init_args = {}
        for arg in args:
            if arg != 'self':
                self.init_args[arg] = locals()[arg]

        self.vocab = vocab

        self.slots = slots
        self.slot_classes = slot_classes

        logging.info('We have the following classes:')
        self._log_classes_info()

        x = T.tensor3()
        input_args = [x]
        input_layer = IdentityInput(x, len(self.vocab))

        prev_layer = input_layer

        if input_n_layers > 0:
            input_transform = MLP([input_n_hidden] * input_n_layers,
                                  [input_activation] * input_n_layers,
                                  p_drop=p_drop)
            input_transform.connect(prev_layer)
            prev_layer = input_transform

        logging.info('There are %d input layers.' % input_n_layers)

        if debug:
            self._lstm_input = theano.function(input_args, prev_layer.output())

        logging.info('Creating LSTM layer with %d neurons.' % (n_cells))
        f_lstm_layer = LstmRecurrent(name="lstm",
                                     size=n_cells,
                                     seq_output=True,
                                     out_cells=False,
                                     peepholes=False,
                                     p_drop=p_drop,
                                     enable_branch_exp=enable_branch_exp)
        f_lstm_layer.connect(prev_layer)

        prev_layer = f_lstm_layer

        y_seq_id = tt.ivector()
        y_time = tt.ivector()
        y_label = {}
        for slot in slots:
            y_label[slot] = tt.ivector(name='y_label_%s' % slot)

        cpt = CherryPick()
        cpt.connect(prev_layer, y_time, y_seq_id)

        costs = []
        predictions = []
        for slot in slots:
            logging.info('Building output classifier for %s.' % slot)
            n_classes = len(slot_classes[slot])
            slot_mlp = MLP([oclf_n_hidden] * oclf_n_layers + [n_classes],
                           [oclf_activation] * oclf_n_layers + ['softmax'],
                           [p_drop] * oclf_n_layers + [0.0],
                           name="mlp_%s" % slot)
            slot_mlp.connect(cpt)
            predictions.append(slot_mlp.output(dropout_active=False))

            slot_objective = CrossEntropyObjective()
            slot_objective.connect(y_hat_layer=slot_mlp, y_true=y_label[slot])
            costs.append(slot_objective)

        cost = SumOut()
        cost.connect(*costs)  #, scale=1.0 / len(slots))
        self.params = params = list(cost.get_params())
        n_params = sum(p.get_value().size for p in params)
        logging.info('This model has %d parameters:' % n_params)
        for param in sorted(params, key=lambda x: x.name):
            logging.info('  - %20s: %10d' % (
                param.name,
                param.get_value().size,
            ))

        cost_value = cost.output(dropout_active=True)

        lr = tt.scalar('lr')
        clipnorm = 0.5
        reg = updates.Regularizer(l1=l1, l2=l2)
        if opt_type == "rprop":
            updater = updates.RProp(lr=lr, clipnorm=clipnorm)
            model_updates = updater.get_updates(params, cost_value)
        elif opt_type == "sgd":
            updater = updates.SGD(lr=lr, clipnorm=clipnorm, regularizer=reg)
        elif opt_type == "rmsprop":
            updater = updates.RMSprop(lr=lr,
                                      clipnorm=clipnorm,
                                      regularizer=reg)  #, regularizer=reg)
        elif opt_type == "adam":
            #reg = updates.Regularizer(maxnorm=5.0)
            updater = updates.Adam(lr=lr, clipnorm=clipnorm,
                                   regularizer=reg)  #,
            # regularizer=reg)
        elif opt_type == "momentum":
            updater = updates.Momentum(lr=lr,
                                       momentum=momentum,
                                       clipnorm=clipnorm,
                                       regularizer=reg)
        else:
            raise Exception("Unknonw opt.")

        loss_args = list(input_args)
        loss_args += [y_seq_id, y_time]
        loss_args += [y_label[slot] for slot in slots]

        if build_train:
            model_updates = updater.get_updates(params, cost_value)

            train_args = [lr] + loss_args
            update_ratio = updater.get_update_ratio(params, model_updates)

            logging.info('Preparing %s train function.' % opt_type)
            t = time.time()
            self._train = theano.function(train_args,
                                          [cost_value, update_ratio],
                                          updates=model_updates)
            logging.info('Preparation done. Took: %.1f' % (time.time() - t))

        self._loss = theano.function(loss_args, cost_value)

        logging.info('Preparing predict function.')
        t = time.time()
        predict_args = list(input_args)
        predict_args += [y_seq_id, y_time]
        self._predict = theano.function(predict_args, predictions)
        logging.info('Done. Took: %.1f' % (time.time() - t))