コード例 #1
0
class LongestMatchGlobalFeature(object):
    def __init__(self, lookup_data, featname):
        """
        Create a global feature function that adds 3 types of features:

        1) B-featname - if current token starts an entity from
           the ``lookup_data``;
        2) I-featname - if current token is inside an entity from
           the ``lookup_data``;
        3) featname - if current token belongs to an entity from the
           ``lookup_data``.

        """
        if hasattr(lookup_data, 'find_ranges'):
            self.lm = lookup_data
        else:
            self.lm = LongestMatch(lookup_data)
        self.b_featname = 'B-' + featname
        self.i_featname = 'I-' + featname
        self.featname = featname

    def __call__(self, doc):
        token_strings = [tok.token for tok, feat in doc]
        for start, end, matched_text in self.lm.find_ranges(token_strings):
            self.process_range(doc, start, end, matched_text)

    def process_range(self, doc, start, end, matched_text):
        doc[start][1][self.b_featname] = True
        doc[start][1][self.featname] = True

        for idx in range(start+1, end):
            doc[idx][1][self.i_featname] = True
            doc[idx][1][self.featname] = True
コード例 #2
0
ファイル: global_features.py プロジェクト: zanachka/webstruct
class LongestMatchGlobalFeature(object):
    def __init__(self, lookup_data, featname):
        """
        Create a global feature function that adds 3 types of features:

        1) B-featname - if current token starts an entity from
           the ``lookup_data``;
        2) I-featname - if current token is inside an entity from
           the ``lookup_data``;
        3) featname - if current token belongs to an entity from the
           ``lookup_data``.

        """
        if hasattr(lookup_data, 'find_ranges'):
            self.lm = lookup_data
        else:
            self.lm = LongestMatch(lookup_data)
        self.b_featname = 'B-' + featname
        self.i_featname = 'I-' + featname
        self.featname = featname

    def __call__(self, doc):
        token_strings = [tok.token for tok, feat in doc]
        for start, end, matched_text in self.lm.find_ranges(token_strings):
            self.process_range(doc, start, end, matched_text)

    def process_range(self, doc, start, end, matched_text):
        doc[start][1][self.b_featname] = True
        doc[start][1][self.featname] = True

        for idx in range(start + 1, end):
            doc[idx][1][self.i_featname] = True
            doc[idx][1][self.featname] = True
コード例 #3
0
ファイル: global_features.py プロジェクト: zanachka/webstruct
    def __init__(self, lookup_data, featname):
        """
        Create a global feature function that adds 3 types of features:

        1) B-featname - if current token starts an entity from
           the ``lookup_data``;
        2) I-featname - if current token is inside an entity from
           the ``lookup_data``;
        3) featname - if current token belongs to an entity from the
           ``lookup_data``.

        """
        if hasattr(lookup_data, 'find_ranges'):
            self.lm = lookup_data
        else:
            self.lm = LongestMatch(lookup_data)
        self.b_featname = 'B-' + featname
        self.i_featname = 'I-' + featname
        self.featname = featname
コード例 #4
0
    def __init__(self, lookup_data, featname):
        """
        Create a global feature function that adds 3 types of features:

        1) B-featname - if current token starts an entity from
           the ``lookup_data``;
        2) I-featname - if current token is inside an entity from
           the ``lookup_data``;
        3) featname - if current token belongs to an entity from the
           ``lookup_data``.

        """
        if hasattr(lookup_data, 'find_ranges'):
            self.lm = lookup_data
        else:
            self.lm = LongestMatch(lookup_data)
        self.b_featname = 'B-' + featname
        self.i_featname = 'I-' + featname
        self.featname = featname