예제 #1
0
    def __init__(self, **property_names):
        PropertyIndirectionMixIn.__init__(self, **property_names)

        # A token reader for processing sentences.
        self._sent_reader = ChunkedTaggedTokenReader(top_node='S',
                                                     chunk_node='NP',
                                                     **property_names)
 def __init__(self, encoders, **property_names):
     """
     Create a new merged feature encoder.
     
     @param encoders: The basic feature encoders whose output
         should be combined to form this encoder's output.
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._encoders = encoders
예제 #3
0
 def __init__(self, encoders, **property_names):
     """
     Create a new merged feature encoder.
     
     @param encoders: The basic feature encoders whose output
         should be combined to form this encoder's output.
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._encoders = encoders
예제 #4
0
 def __init__(self, preterminal_tags=False, **property_names):
     """
     @param preterminal_tags: If true, then treat preterminal
         nodes as tags.
     @type preterminal_tags: C{boolean}
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._preterminal_tags = preterminal_tags
     self._source = None # <- not thread-safe.
     self.parse_iter = Tree.parse_iter
예제 #5
0
 def __init__(self, preterminal_tags=False, **property_names):
     """
     @param preterminal_tags: If true, then treat preterminal
         nodes as tags.
     @type preterminal_tags: C{boolean}
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._preterminal_tags = preterminal_tags
     self._source = None  # <- not thread-safe.
     self.parse_iter = Tree.parse_iter
 def __init__(self, initial_tagger, rules, **property_names):
     """
     @param initial_tagger: The initial tagger
     @type initial_tagger: L{TaggerI}
     @param rules: An ordered list of transformation rules that
         should be used to correct the initial tagging.
     @type rules: C{list} of L{BrillRuleI}
     """
     self._initial_tagger = initial_tagger
     self._rules = rules
     PropertyIndirectionMixIn.__init__(self, **property_names)
예제 #7
0
 def __init__(self, initial_tagger, rules, **property_names):
     """
     @param initial_tagger: The initial tagger
     @type initial_tagger: L{TaggerI}
     @param rules: An ordered list of transformation rules that
         should be used to correct the initial tagging.
     @type rules: C{list} of L{BrillRuleI}
     """
     self._initial_tagger = initial_tagger
     self._rules = rules
     PropertyIndirectionMixIn.__init__(self, **property_names)
예제 #8
0
    def __init__(self, **property_names):
        """
        Construct a new tokenizer.

        @type property_names: C{dict}
        @param property_names: A dictionary that can be used to override
            the default property names.  Each entry maps from a
            default property name to a new property name.
        """
        if self.__class__ == AbstractTokenizer:
            raise AssertionError, "Abstract classes can't be instantiated"
        PropertyIndirectionMixIn.__init__(self, **property_names)
예제 #9
0
 def __init__(self, top_node='S', chunk_node='CHUNK', **property_names):
     """
     @include: AbstractTokenizer.__init__
     @type chunk_node: C{string}
     @param chunk_node: The node label that should be used for
         chunk subtrees.  This is typically a short string
         describing the type of information contained by the chunk,
         such as C{"NP"} for base noun phrases.
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._chunk_node = chunk_node
     self._top_node = top_node
예제 #10
0
 def __init__(self, top_node='S', chunk_node='CHUNK', **property_names):
     """
     @include: AbstractTokenizer.__init__
     @type chunk_node: C{string}
     @param chunk_node: The node label that should be used for
         chunk subtrees.  This is typically a short string
         describing the type of information contained by the chunk,
         such as C{"NP"} for base noun phrases.
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._chunk_node = chunk_node
     self._top_node = top_node
예제 #11
0
    def __init__(self, **property_names):
        """
        Construct a new tokenizer.

        @type property_names: C{dict}
        @param property_names: A dictionary that can be used to override
            the default property names.  Each entry maps from a
            default property name to a new property name.
        """
        if self.__class__ == AbstractTokenizer:
            raise AssertionError, "Abstract classes can't be instantiated"
        PropertyIndirectionMixIn.__init__(self, **property_names)
 def __init__(self, **property_names):
     """
     Create a new stemmer.
     
     @type property_names: C{dict}
     @param property_names: A dictionary that can be used to override
         the default property names.  Each entry maps from a
         default property name to a new property name.
     """
     # Make sure we're not directly instantiated:
     if self.__class__ == AbstractStemmer:
         raise AssertionError, "Abstract classes can't be instantiated"
     PropertyIndirectionMixIn.__init__(self, **property_names)
 def __init__(self, reverse=False, **property_names):
     """
     Construct a new sequential tagger.
     
     @param reverse: If true, then assign tags to subtokens in
         reverse sequential order (i.e., from right to left).
     @type property_names: C{dict}
     @param property_names: A dictionary that can be used to override
         the default property names.  Each entry maps from a
         default property name to a new property name.
     """
     self._reverse = reverse
     PropertyIndirectionMixIn.__init__(self, **property_names)
예제 #14
0
 def __init__(self, **property_names):
     """
     Create a new stemmer.
     
     @type property_names: C{dict}
     @param property_names: A dictionary that can be used to override
         the default property names.  Each entry maps from a
         default property name to a new property name.
     """
     # Make sure we're not directly instantiated:
     if self.__class__ == AbstractStemmer:
         raise AssertionError, "Abstract classes can't be instantiated"
     PropertyIndirectionMixIn.__init__(self, **property_names)
예제 #15
0
 def __init__(self, chunk_types = ['LOCATION', 'ORGANIZATION', 'PERSON', 
         'DURATION', 'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE'],
              **property_names):
     """
     Create a new C{IeerChunkedTokenizer}.
     
     @type chunk_types: C{string}
     @param chunk_types: A list of the node types to be extracted
         from the input.  Possible node types are
         C{'LOCATION'}, C{'ORGANIZATION'}, C{'PERSON'},
         C{'DURATION'}, C{'DATE'}, C{'CARDINAL'}, C{'PERCENT'},
         C{'MONEY'}, C{'MEASURE'}
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._chunk_types = chunk_types
예제 #16
0
 def __init__(self, base_encoder, C=None, **property_names):
     """
     @param C: The correction constant for this encoder.  This
         value must be at least as great as the highest sum of
         feature vectors that could be returned by C{base_encoder}.
         If no value is given, a default of C{len(base_encoder)} is
         used.  While this value is safe (for boolean feature
         vectors), it is highly conservative, and usually leads to
         poor performance.
     @type C: C{int}
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._encoder = base_encoder
     if C is None:
         self._C = encoder.num_features()
     else:
         self._C = C
예제 #17
0
 def __init__(self, base_encoder, C=None, **property_names):
     """
     @param C: The correction constant for this encoder.  This
         value must be at least as great as the highest sum of
         feature vectors that could be returned by C{base_encoder}.
         If no value is given, a default of C{len(base_encoder)} is
         used.  While this value is safe (for boolean feature
         vectors), it is highly conservative, and usually leads to
         poor performance.
     @type C: C{int}
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._encoder = base_encoder
     if C is None:
         self._C = encoder.num_features()
     else:
         self._C = C
예제 #18
0
    def __init__(self, classes, weights, **property_names):
        """
        Construct a new conditional exponential classifier model.
        Typically, new classifier models are created by
        C{ClassifierTrainer}s.

        @type classes: C{list}
        @param classes: A list of the classes that can be generated by
            this classifier.  The order of these classes must
            correspond to the order of the weights.
        @type weights: C{list} of C{float}
        @param weights:  The feature weight vector for this classifier.
            Weight M{w[i,j]} is encoded by C{weights[i+j*N]}, where
            C{N} is the length of the feature vector.
        """
        PropertyIndirectionMixIn.__init__(self, **property_names)
        self._classes = classes  # <- order matters here!
        self._weights = weights
예제 #19
0
    def __init__(self, classes, weights, **property_names):
        """
        Construct a new conditional exponential classifier model.
        Typically, new classifier models are created by
        C{ClassifierTrainer}s.

        @type classes: C{list}
        @param classes: A list of the classes that can be generated by
            this classifier.  The order of these classes must
            correspond to the order of the weights.
        @type weights: C{list} of C{float}
        @param weights:  The feature weight vector for this classifier.
            Weight M{w[i,j]} is encoded by C{weights[i+j*N]}, where
            C{N} is the length of the feature vector.
        """
        PropertyIndirectionMixIn.__init__(self, **property_names)
        self._classes = classes # <- order matters here!
        self._weights = weights
예제 #20
0
 def __init__(self,
              chunk_types=[
                  'LOCATION', 'ORGANIZATION', 'PERSON', 'DURATION', 'DATE',
                  'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE'
              ],
              **property_names):
     """
     Create a new C{IeerChunkedTokenizer}.
     
     @type chunk_types: C{string}
     @param chunk_types: A list of the node types to be extracted
         from the input.  Possible node types are
         C{'LOCATION'}, C{'ORGANIZATION'}, C{'PERSON'},
         C{'DURATION'}, C{'DATE'}, C{'CARDINAL'}, C{'PERCENT'},
         C{'MONEY'}, C{'MEASURE'}
     """
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._chunk_types = chunk_types
    def __init__(self, feature_name, values, **property_names):
        """
        Create a new feature encoder that encodes the feature with the
        given name.

        @type feature_name: C{string}
        @param feature_name: The name of the feature to encode.
        @type values: C{list}
        @param values: A list of the feature values of subvalues that
            the feature is known to take.  A feature vector index
            will also be created for unseen values.
        """
        PropertyIndirectionMixIn.__init__(self, **property_names)
        self._feature_name = feature_name

        # Initialize the mappings between basic values and feature
        # vector indices.  Reserve index 0 for unseen feature values.
        self._index_to_val = ['<unknown>'] + list(values)
        self._val_to_index = dict([(v, i + 1) for (i, v) in enumerate(values)])
예제 #22
0
    def __init__(self, feature_name, values, **property_names):
        """
        Create a new feature encoder that encodes the feature with the
        given name.

        @type feature_name: C{string}
        @param feature_name: The name of the feature to encode.
        @type values: C{list}
        @param values: A list of the feature values of subvalues that
            the feature is known to take.  A feature vector index
            will also be created for unseen values.
        """
        PropertyIndirectionMixIn.__init__(self, **property_names)
        self._feature_name = feature_name
        
        # Initialize the mappings between basic values and feature
        # vector indices.  Reserve index 0 for unseen feature values.
        self._index_to_val = ['<unknown>']+list(values)
        self._val_to_index = dict([(v,i+1) for (i,v) in enumerate(values)])
예제 #23
0
 def __init__(self, initial_tagger, templates, trace=0, **property_names):
     self._initial_tagger = initial_tagger
     self._templates = templates
     self._trace = trace
     self._property_names = property_names
     PropertyIndirectionMixIn.__init__(self, **property_names)
 def __init__(self, initial_tagger, templates, trace=0, **property_names):
     self._initial_tagger = initial_tagger
     self._templates = templates
     self._trace = trace
     self._property_names = property_names
     PropertyIndirectionMixIn.__init__(self, **property_names)
예제 #25
0
 def __init__(self, chunk_types=None, **property_names):
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._chunk_types = chunk_types
예제 #26
0
 def __init__(self, **property_names):
     PropertyIndirectionMixIn.__init__(self, **property_names)
 def __init__(self, chunk_types=None, **property_names):
     PropertyIndirectionMixIn.__init__(self, **property_names)
     self._chunk_types = chunk_types
예제 #28
0
 def __init__(self, **property_names):
     PropertyIndirectionMixIn.__init__(self, **property_names)
예제 #29
0
    def __init__(self,  **property_names):
        PropertyIndirectionMixIn.__init__(self, **property_names)

        # A token reader for processing sentences.
        self._sent_reader = ChunkedTaggedTokenReader(
            top_node='S', chunk_node='NP', **property_names)