Esempio n. 1
0
    def make(self, theano_kwargs=None):
        """Construct the Fergus-Recurrent model
        
        Model: 
            Input at time t: 
                - Soft attention over embedded lexemes of children of node_t
                - Embedded lexeme of node_t
            Compute:
                - Inputs are fed into a recurrent tree s.t. hidden states travel down branches
                - node_t's supertag embeddings are retrieved
                - output of recurrent tree at time t is aligned with each supertag vector
                - a vectorized probability function computes a distribution
            Output:
                - Distribution over supertags for node_t
        """
        if self.igor.embedding_type == "convolutional":
            make_convolutional_embedding(self.igor)
        elif self.igor.embedding_type == "token":
            make_token_embedding(self.igor)
        elif self.igor.embedding_type == "shallowconv":
            make_shallow_convolutional_embedding(self.igor)
        elif self.igor.embedding_type == "minimaltoken":
            make_minimal_token_embedding(self.igor)
        else:
            raise Exception("Incorrect embedding type")

        spine_input_shape = (self.igor.batch_size, self.igor.max_sequence,
                             self.igor.max_num_supertags)

        node_input_shape = (self.igor.batch_size, self.igor.max_sequence)

        dctx_input_shape = (self.igor.batch_size, self.igor.max_sequence,
                            self.igor.max_daughter_size)

        E, V = self.igor.word_embedding_size, self.igor.word_vocab_size  # for word embeddings
        repeat_N = self.igor.max_num_supertags  # for lex
        repeat_D = self.igor.max_daughter_size
        mlp_size = self.igor.mlp_size

        ## dropout parameters
        p_emb = self.igor.p_emb_dropout
        p_W = self.igor.p_W_dropout
        p_U = self.igor.p_U_dropout
        w_decay = self.igor.weight_decay
        p_mlp = self.igor.p_mlp_dropout

        #### make layer inputs
        spineset_in = Input(batch_shape=spine_input_shape,
                            name='parent_spineset_in',
                            dtype='int32')
        phead_in = Input(batch_shape=node_input_shape,
                         name='parent_head_input',
                         dtype='int32')
        dctx_in = Input(batch_shape=dctx_input_shape,
                        name='daughter_context_input',
                        dtype='int32')
        topology_in = Input(batch_shape=node_input_shape,
                            name='node_topology',
                            dtype='int32')

        ##### params
        def predict_params():
            return {
                'output_dim': 1,
                'W_regularizer': l2(w_decay),
                'activation': 'relu',
                'b_regularizer': l2(w_decay)
            }

        ### Layer functions
        ############# Convert the word indices to vectors
        F_embedword = Embedding(input_dim=V,
                                output_dim=E,
                                mask_zero=True,
                                W_regularizer=l2(w_decay),
                                dropout=p_emb,
                                name='embedword')
        if self.igor.saved_embeddings is not None:
            print("Loading saved embeddings....")
            F_embedword.initial_weights = [self.igor.saved_embeddings]

        F_probability = ProbabilityTensor(
            name='predictions', dense_function=Dense(**predict_params()))
        ### composition functions

        F_softdaughters = compose(
            LambdaMask(lambda x, mask: None, name='remove_attention_mask'),
            Distribute(SoftAttention(name='softdaughter'),
                       name='distribute_softdaughter'), F_embedword)

        F_align = compose(Distribute(Dropout(p_mlp)),
                          Distribute(Dense(mlp_size, activation='relu')),
                          concat)

        F_rtn = compose(
            RepeatVector(repeat_N, axis=2, name='repeattree'),
            BranchLSTM(self.igor.rtn_size,
                       name='recurrent_tree1',
                       return_sequences=True))

        F_predict = compose(
            Distribute(F_probability, name='distribute_probability'),
            Distribute(
                Dropout(p_mlp)
            ),  ### need a separate one because the 'concat' is different for the two situations
            LastDimDistribute(Dense(mlp_size, activation='relu')),
            concat)

        ############################ new ###########################

        dctx = F_softdaughters(dctx_in)
        parent = F_embedword(phead_in)
        #node_context = F_align([parent, dctx])
        #import pdb
        #pdb.set_trace()

        ### put into tree
        aligned_node = F_align([parent, dctx])
        node_context = F_rtn([aligned_node, topology_in])

        parent_spines = self.igor.F_embedspine(spineset_in)
        ### get probability
        predictions = F_predict([node_context, parent_spines])

        ##################
        ### make model
        ##################
        self.model = Model(input=[dctx_in, phead_in, topology_in, spineset_in],
                           output=predictions,
                           preloaded_data=self.igor.preloaded_data)

        ##################
        ### compile model
        ##################
        optimizer = Adam(self.igor.LR,
                         clipnorm=self.igor.max_grad_norm,
                         clipvalue=self.igor.grad_clip_threshold)
        theano_kwargs = theano_kwargs or {}
        self.model.compile(loss='categorical_crossentropy',
                           optimizer=optimizer,
                           metrics=['accuracy'],
                           **theano_kwargs)

        if self.igor.from_checkpoint:
            self.load_checkpoint_weights()
        elif not self.igor.in_training:
            raise Exception("No point in running this without trained weights")
Esempio n. 2
0
    def make(self, theano_kwargs=None):
        '''Make the model and compile it. 

        Igor's config options control everything.  

        Arg:
            theano_kwargs as dict for debugging theano or submitting something custom
        '''

        if self.igor.embedding_type == "convolutional":
            make_convolutional_embedding(self.igor)
        elif self.igor.embedding_type == "token":
            make_token_embedding(self.igor)
        elif self.igor.embedding_type == "shallowconv":
            make_shallow_convolutional_embedding(self.igor)
        elif self.igor.embedding_type == "minimaltoken":
            make_minimal_token_embedding(self.igor)
        else:
            raise Exception("Incorrect embedding type")

        B = self.igor.batch_size
        spine_input_shape = (B, self.igor.max_num_supertags)
        child_input_shape = (B, 1)
        parent_input_shape = (B, 1)

        E, V = self.igor.word_embedding_size, self.igor.word_vocab_size  # for word embeddings

        repeat_N = self.igor.max_num_supertags  # for lex
        mlp_size = self.igor.mlp_size

        ## dropout parameters
        p_emb = self.igor.p_emb_dropout
        p_W = self.igor.p_W_dropout
        p_U = self.igor.p_U_dropout
        w_decay = self.igor.weight_decay
        p_mlp = self.igor.p_mlp_dropout

        def predict_params():
            return {
                'output_dim': 1,
                'W_regularizer': l2(w_decay),
                'activation': 'relu',
                'b_regularizer': l2(w_decay)
            }

        dspineset_in = Input(batch_shape=spine_input_shape,
                             name='daughter_spineset_in',
                             dtype='int32')
        pspineset_in = Input(batch_shape=spine_input_shape,
                             name='parent_spineset_in',
                             dtype='int32')
        dhead_in = Input(batch_shape=child_input_shape,
                         name='daughter_head_input',
                         dtype='int32')
        phead_in = Input(batch_shape=parent_input_shape,
                         name='parent_head_input',
                         dtype='int32')
        dspine_in = Input(batch_shape=child_input_shape,
                          name='daughter_spine_input',
                          dtype='int32')
        inputs = [dspineset_in, pspineset_in, dhead_in, phead_in, dspine_in]

        ### Layer functions
        ############# Convert the word indices to vectors
        F_embedword = Embedding(input_dim=V,
                                output_dim=E,
                                mask_zero=True,
                                W_regularizer=l2(w_decay),
                                dropout=p_emb)

        if self.igor.saved_embeddings is not None:
            self.logger.info("+ Cached embeddings loaded")
            F_embedword.initial_weights = [self.igor.saved_embeddings]

        ###### Prediction Functions
        ## these functions learn a vector which turns a tensor into a matrix of probabilities

        ### P(Parent supertag | Child, Context)
        F_parent_predict = ProbabilityTensor(
            name='parent_predictions',
            dense_function=Dense(**predict_params()))
        ### P(Leaf supertag)
        F_leaf_predict = ProbabilityTensor(
            name='leaf_predictions', dense_function=Dense(**predict_params()))

        ###### Network functions.
        ##### Input word, correct its dimensions (basically squash in a certain way)
        F_singleword = compose(Fix(), F_embedword)
        ##### Input spine, correct diemnsions, broadcast across 1st dimension
        F_singlespine = compose(RepeatVector(repeat_N), Fix(),
                                self.igor.F_embedspine)
        ##### Concatenate and map to a single space
        F_alignlex = compose(
            RepeatVector(repeat_N), Dropout(p_mlp),
            Dense(mlp_size, activation='relu', name='dense_align_lex'), concat)

        F_alignall = compose(
            Distribute(Dropout(p_mlp), name='distribute_align_all_dropout'),
            Distribute(Dense(mlp_size,
                             activation='relu',
                             name='align_all_dense'),
                       name='distribute_align_all_dense'), concat)
        F_alignleaf = compose(
            Distribute(
                Dropout(p_mlp * 0.66), name='distribute_leaf_dropout'
            ),  ### need a separate oen because the 'concat' is different for the two situations
            Distribute(Dense(mlp_size, activation='relu', name='leaf_dense'),
                       name='distribute_leaf_dense'),
            concat)

        ### embed and form all of the inputs into their components
        ### note: spines == supertags. early word choice, haven't refactored.
        leaf_spines = self.igor.F_embedspine(dspineset_in)
        pspine_context = self.igor.F_embedspine(pspineset_in)
        dspine_single = F_singlespine(dspine_in)

        dhead = F_singleword(dhead_in)
        phead = F_singleword(phead_in)

        ### combine the lexical material
        lexical_context = F_alignlex([dhead, phead])

        #### P(Parent Supertag | Daughter Supertag, Lexical Context)
        ### we know the daughter spine, want to know the parent spine
        ### size is (batch, num_supertags)
        parent_problem = F_alignall(
            [lexical_context, dspine_single, pspine_context])

        ### we don't have the parent, we just have a leaf
        leaf_problem = F_alignleaf([lexical_context, leaf_spines])

        parent_predictions = F_parent_predict(parent_problem)
        leaf_predictions = F_leaf_predict(leaf_problem)
        predictions = [parent_predictions, leaf_predictions]

        theano_kwargs = theano_kwargs or {}
        ## make it quick so i can load in the weights.
        self.model = Model(input=inputs,
                           output=predictions,
                           preloaded_data=self.igor.preloaded_data,
                           **theano_kwargs)

        #mask_cache = traverse_nodes(parent_prediction)
        #desired_masks = ['merge_3.in.mask.0']
        #self.p_tensor = K.function(inputs+[K.learning_phase()], [parent_predictions, F_parent_predict.inbound_nodes[0].input_masks[0]])

        if self.igor.from_checkpoint:
            self.load_checkpoint_weights()
        elif not self.igor.in_training:
            raise Exception("No point in running this without trained weights")

        if not self.igor.in_training:
            expanded_children = RepeatVector(repeat_N, axis=2)(leaf_spines)
            expanded_parent = RepeatVector(repeat_N, axis=1)(pspine_context)
            expanded_lex = RepeatVector(repeat_N, axis=1)(
                lexical_context
            )  # axis here is arbitary; its repeating on 1 and 2, but already repeated once
            huge_tensor = concat(
                [expanded_lex, expanded_children, expanded_parent])
            densely_aligned = LastDimDistribute(
                F_alignall.get(1).layer)(huge_tensor)
            output_predictions = Distribute(
                F_parent_predict, force_reshape=True)(densely_aligned)

            primary_inputs = [phead_in, dhead_in, pspineset_in, dspineset_in]
            leaf_inputs = [phead_in, dhead_in, dspineset_in]

            self.logger.info("+ Compiling prediction functions")
            self.inner_func = K.Function(primary_inputs + [K.learning_phase()],
                                         output_predictions)
            self.leaf_func = K.Function(leaf_inputs + [K.learning_phase()],
                                        leaf_predictions)
            try:
                self.get_ptensor = K.function(
                    primary_inputs + [K.learning_phase()], [
                        output_predictions,
                    ])
            except:
                import pdb
                pdb.set_trace()
        else:

            optimizer = Adam(self.igor.LR,
                             clipnorm=self.igor.max_grad_norm,
                             clipvalue=self.igor.grad_clip_threshold)

            theano_kwargs = theano_kwargs or {}
            self.model.compile(loss="categorical_crossentropy",
                               optimizer=optimizer,
                               metrics=['accuracy'],
                               **theano_kwargs)