Example #1
0
 def __init__(self, n_left=2, n_right=2):
     self.chunker = BILOUChunkEncoder() 
     self.n_left = n_left 
     self.n_right = n_right 
Example #2
0
class Encoder(object):
    """Abstract base class for feature encoders.

    Inputs
    ------
    n_left : int, optional
        Number of tokens of left context to include.
        (Default: 2)

    n_right : int, optional
        Number of tokens of right context to include.
        (Default: 2)

    Attributes
    ----------
    chunker : chunk.ChunkEncoder
        ChunkEncoder instance used to generate tags.
    """
    def __init__(self, n_left=2, n_right=2):
        self.chunker = BILOUChunkEncoder() 
        self.n_left = n_left 
        self.n_right = n_right 

    def get_feats_for_token(self, token):
        """Return features for token.

        Inputs
        ------
        token : str
            Token.

        Outputs
        -------
        feats : tuple of str
            Features vector.
        """
        raise NotImplementedError 

    def get_feats(self, tokens):
        """Return features corresponding to token sequence.

        Inputs
        ------
        tokens : list of str
            Token sequence.

        Outputs
        -------
        feats : lsit of tuples
            Feature vector sequence.
        """
        feats = [self.get_feats_for_token(token) for token in tokens] 
        feats = zip(*feats) 
        new_feats = [] 
        for ii, feats_ in enumerate(feats):
            for pos in xrange(-self.n_left, self.n_right+1):
                feat_id = 'F%d[%d]' % (ii, pos) 
                k = -pos 
                new_feats.append(['%s=%s' % (feat_id, val) if val is not None else val for val in roll(feats_, k)])

        new_feats = zip(*new_feats)
        #print new_feats[0]
        #print '============================================================================================'

        # for ii,row in enumerate(new_feats):
        #     new_row = [v if not v is None else 'none' for v in row]
        #     new_feats[ii] = new_row
        # Filter out None vals in rows where they occur.
        for ii, row in enumerate(new_feats):
            new_row = [v for v in row if not v is None]
            new_feats[ii] = new_row
        # print new_feats[0]
        # print '**********************************************************************************************'
        return new_feats


    def get_targets(self, tokens, mentions):
        """Return tag sequence to train against.

        Inputs
        ------
        tokens : list of str
            Token sequence.

        mentions : list of list
            List of mention tuples, each of the form (tag, start_token_index,
            enc_token_index).

        Outputs
        -------
        targets : list of str
            Target label sequence.
        """
        tags = ['O' for token in tokens] 
        for tag, bi, ei in mentions:
            chunk = tokens[bi:ei+1] 
            tags[bi:ei+1] = self.chunker.chunk_to_tags(chunk, tag) 
        return tags 

    def get_feats_targets(self, tokens, mentions):
        """Return features/tag sequence to train against.

        Inputs
        ------
        tokens : list of str
            Token sequence.

        mentions : list of list
            List of mention tuples, each of the form (tag, start_token_index,
            enc_token_index).

        Outputs
        -------
        feats : list of tuples
            Feature vector sequence.

        targets : list of str
            Target label sequence.
        """
        feats = self.get_feats(tokens) 
        targets = self.get_targets(tokens, mentions) 
        return feats, targets