Python chktype 예제들, nltk.chktype.chktype Python 예제들

예제 #1

0

파일 보기

    def cluster(self, tokens, assign_clusters=False, trace=False):
        assert chktype(1, tokens, [Token])
        assert chktype(2, assign_clusters, bool)
        assert chktype(3, trace, bool)
        assert len(tokens) > 0
        vectors = map(lambda tk: tk['FEATURES'], tokens)

        # normalise the vectors
        if self._should_normalise:
            vectors = map(self._normalise, vectors)

        # use SVD to reduce the dimensionality
        if self._svd_dimensions and self._svd_dimensions < len(vectors[0]):
            [u, d, vt] = LinearAlgebra.singular_value_decomposition(
                Numeric.transpose(Numeric.array(vectors)))
            S = d[:self._svd_dimensions] * \
                Numeric.identity(self._svd_dimensions, Numeric.Float64)
            T = u[:, :self._svd_dimensions]
            Dt = vt[:self._svd_dimensions, :]
            vectors = Numeric.transpose(Numeric.matrixmultiply(S, Dt))
            self._Tt = Numeric.transpose(T)

        # call abstract method to cluster the vectors
        self.cluster_vectorspace(vectors, trace)

        # assign the tokens to clusters
        if assign_clusters:
            for token in tokens:
                self.classify(token)

예제 #2

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

    def cluster(self, tokens, assign_clusters=False, trace=False):
        assert chktype(1, tokens, [Token])
        assert chktype(2, assign_clusters, bool)
        assert chktype(3, trace, bool)
        assert len(tokens) > 0
        vectors = map(lambda tk: tk['FEATURES'], tokens)

        # normalise the vectors
        if self._should_normalise:
            vectors = map(self._normalise, vectors)

        # use SVD to reduce the dimensionality
        if self._svd_dimensions and self._svd_dimensions < len(vectors[0]):
            [u, d, vt] = LinearAlgebra.singular_value_decomposition(
                            Numeric.transpose(Numeric.array(vectors)))
            S = d[:self._svd_dimensions] * \
                Numeric.identity(self._svd_dimensions, Numeric.Float64)
            T = u[:,:self._svd_dimensions]
            Dt = vt[:self._svd_dimensions,:]
            vectors = Numeric.transpose(Numeric.matrixmultiply(S, Dt))
            self._Tt = Numeric.transpose(T)
            
        # call abstract method to cluster the vectors
        self.cluster_vectorspace(vectors, trace)

        # assign the tokens to clusters
        if assign_clusters:
            for token in tokens:
                self.classify(token)

예제 #3

0

파일 보기

파일: tree.py 프로젝트: ronaldahmed/robot-navigation

    def pp(self, margin=70, indent=0, nodesep=':', parens='()'):
        """
        @return: A pretty-printed string representation of this tree.
        @rtype: C{string}
        @param margin: The right margin at which to do line-wrapping.
        @type margin: C{int}
        @param indent: The indentation level at which printing
            begins.  This number is used to decide how far to indent
            subsequent lines.
        @type indent: C{int}
        @param nodesep: A string that is used to separate the node
            from the children.  E.g., the default value C{':'} gives
            trees like C{(S: (NP: I) (VP: (V: saw) (NP: it)))}.
        """
        assert chktype(1, margin, types.IntType)
        assert chktype(2, indent, types.IntType)

        # Try writing it on one line.
        s = self._ppflat(nodesep, parens)
        if len(s)+indent < margin:
            return s

        # If it doesn't fit on one line, then write it on multi-lines.
        s = '%s%s%s' % (parens[0], self.node, nodesep)
        for child in self:
            if isinstance(child, Tree):
                s += '\n'+' '*(indent+2)+child.pp(margin, indent+2,
                                                  nodesep, parens)
            else:
                s += '\n'+' '*(indent+2)+repr(child)
        return s+parens[1]

예제 #4

0

파일 보기

 def setdefault(self, property, default=None):
     assert chktype(1, property, str)
     assert chktype(2, default, self._checkval)
     if ((property == 'LOC') and not isinstance(default, LocationI)
         and default is not None):
         raise TypeError("The 'LOC' property must contain a Location")
     return super(SafeToken, self).setdefault(property, default)

예제 #5

0

파일 보기

 def __setitem__(self, property, value):
     assert chktype(1, property, str)
     assert chktype(2, value, self._checkval)
     if ((property == 'LOC') and not isinstance(value, LocationI)
         and value is not None):
         raise TypeError("The 'LOC' property must contain a Location")
     return super(SafeToken, self).__setitem__(property, value)

예제 #6

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

    def __init__(self, states=None, symbols=None, **properties):
        """
        Creates an HMM trainer to induce an HMM with the given states and
        output symbol alphabet. A supervised and unsupervised training
        method may be used. If either of the states or symbols are not given,
        these may be derived from supervised training.

        @param states:  the set of state labels
        @type states:   sequence of any
        @param symbols: the set of observation symbols
        @type symbols:  sequence of any
        @param properties: alternative names to be used for the TEXT,
                        SUBTOKENS and TAG properties
        """
        assert chktype(1, symbols, types.TupleType, types.ListType, types.NoneType)
        assert chktype(2, states, types.TupleType, types.ListType, types.NoneType)
        if states:
            self._states = states
        else:
            self._states = []
        if symbols:
            self._symbols = symbols
        else:
            self._symbols = []
        self._properties = properties

예제 #7

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

    def __init__(self, symbols, states, transitions, outputs, priors, **properties):
        """
        Creates a hidden markov model parametised by the the states,
        transition probabilities, output probabilities and priors.

        @param  symbols:        the set of output symbols (alphabet)
        @type   symbols:        (seq) of any
        @param  states:         a set of states representing state space
        @type   states:         seq of any
        @param  transitions:    transition probabilities; Pr(s_i | s_j)
                                is the probability of transition from state i
                                given the model is in state_j
        @type   transitions:    C{ConditionalProbDistI}
        @param  outputs:        output probabilities; Pr(o_k | s_i) is the
                                probability of emitting symbol k when entering
                                state i
        @type   outputs:        C{ConditionalProbDistI}
        @param  priors:         initial state distribution; Pr(s_i) is the
                                probability of starting in state i
        @type   priors:         C{ProbDistI}
        @param  properties:     property names: TAG, TEXT, and SUBTOKENS are
                                used and may be overridden
        @type   properties:     C{dict}
        """
        assert chktype(1, symbols, types.TupleType, types.ListType)
        assert chktype(2, states, types.TupleType, types.ListType)
        assert chktype(3, transitions, ConditionalProbDistI)
        # assert chktype(4, outputs, ConditionalProbDistI)
        assert chktype(5, priors, ProbDistI)
        self._states = states
        self._transitions = transitions
        self._symbols = symbols
        self._outputs = outputs
        self._priors = priors
        self._properties = properties

예제 #8

0

파일 보기

    def pp(self, margin=70, indent=0, nodesep=':', parens='()'):
        """
        @return: A pretty-printed string representation of this tree.
        @rtype: C{string}
        @param margin: The right margin at which to do line-wrapping.
        @type margin: C{int}
        @param indent: The indentation level at which printing
            begins.  This number is used to decide how far to indent
            subsequent lines.
        @type indent: C{int}
        @param nodesep: A string that is used to separate the node
            from the children.  E.g., the default value C{':'} gives
            trees like C{(S: (NP: I) (VP: (V: saw) (NP: it)))}.
        """
        assert chktype(1, margin, types.IntType)
        assert chktype(2, indent, types.IntType)

        # Try writing it on one line.
        s = self._ppflat(nodesep, parens)
        if len(s) + indent < margin:
            return s

        # If it doesn't fit on one line, then write it on multi-lines.
        s = '%s%s%s' % (parens[0], self.node, nodesep)
        for child in self:
            if isinstance(child, Tree):
                s += '\n' + ' ' * (indent + 2) + child.pp(
                    margin, indent + 2, nodesep, parens)
            else:
                s += '\n' + ' ' * (indent + 2) + repr(child)
        return s + parens[1]

예제 #9

0

파일 보기

파일: __init__.py 프로젝트: shubhampachori12110095/navigation-corpus

def tagger_accuracy(tagger, gold_standard):
    """
    Score the accuracy of the tagger against the gold standard.
    Strip the tags from the gold standard text, retag it using
    the tagger, then compute the accuracy score.

    @type tagger: C{Tagger}
    @param tagger: The tagger being evaluated.
    @type gold_standard: C{list} of C{Token}
    @param gold_standard: The list of tagged tokens to score
      the tagger on; each must have the 'SUBTOKENS' attribute.
    @rtype: C{float}
    """

    # NB: replace tagger._property_names with tagger.property_names()?

    assert chktype(1, tagger, TaggerI)
    assert chktype(2, gold_standard, (Token,), [Token])
    TAG = tagger.property('TAG')
    SUBTOKENS = tagger.property('SUBTOKENS')

    gold_toks = []
    test_toks = []
    for gold_doc in gold_standard:
        test_doc = gold_doc.exclude(TAG)
        tagger.tag(test_doc)
        gold_toks += gold_doc[SUBTOKENS]
        test_toks += test_doc[SUBTOKENS]
    return accuracy(gold_toks, test_toks)

예제 #10

0

파일 보기

    def __init__(self, states=None, symbols=None, **properties):
        """
        Creates an HMM trainer to induce an HMM with the given states and
        output symbol alphabet. A supervised and unsupervised training
        method may be used. If either of the states or symbols are not given,
        these may be derived from supervised training.

        @param states:  the set of state labels
        @type states:   sequence of any
        @param symbols: the set of observation symbols
        @type symbols:  sequence of any
        @param properties: alternative names to be used for the TEXT,
                        SUBTOKENS and TAG properties
        """
        assert chktype(1, symbols, types.TupleType, types.ListType,
                       types.NoneType)
        assert chktype(2, states, types.TupleType, types.ListType,
                       types.NoneType)
        if states:
            self._states = states
        else:
            self._states = []
        if symbols:
            self._symbols = symbols
        else:
            self._symbols = []
        self._properties = properties

예제 #11

0

파일 보기

 def __init__(self, num_clusters=1, normalise=True, svd_dimensions=None):
     assert chktype(1, num_clusters, int)
     assert chktype(2, normalise, bool)
     assert chktype(3, svd_dimensions, int, types.NoneType)
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_clusters = num_clusters
     self._dendogram = None
     self._groups_values = None

예제 #12

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def __init__(self, num_clusters=1, normalise=True, svd_dimensions=None):
     assert chktype(1, num_clusters, int)
     assert chktype(2, normalise, bool)
     assert chktype(3, svd_dimensions, int, types.NoneType)
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_clusters = num_clusters
     self._dendogram = None
     self._groups_values = None

예제 #13

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def __init__(self, normalise=False, svd_dimensions=None):
     """
     @param normalise:       should vectors be normalised to length 1
     @type normalise:        boolean
     @param svd_dimensions:  number of dimensions to use in reducing vector
                             dimensionsionality with SVD
     @type svd_dimensions:   int 
     """
     assert chktype(1, normalise, bool)
     assert chktype(2, svd_dimensions, int, types.NoneType)
     self._Tt = None
     self._should_normalise = normalise
     self._svd_dimensions = svd_dimensions

예제 #14

0

파일 보기

 def __init__(self, normalise=False, svd_dimensions=None):
     """
     @param normalise:       should vectors be normalised to length 1
     @type normalise:        boolean
     @param svd_dimensions:  number of dimensions to use in reducing vector
                             dimensionsionality with SVD
     @type svd_dimensions:   int 
     """
     assert chktype(1, normalise, bool)
     assert chktype(2, svd_dimensions, int, types.NoneType)
     self._Tt = None
     self._should_normalise = normalise
     self._svd_dimensions = svd_dimensions

예제 #15

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

    def train_supervised(self, labelled_sequences, **kwargs):
        """
        Supervised training maximising the joint probability of the symbol and
        state sequences. This is done via collecting frequencies of
        transitions between states, symbol observations while within each
        state and which states start a sentence. These frequency distributions
        are then normalised into probability estimates, which can be
        smoothed if desired.

        @return: the trained model
        @rtype: HiddenMarkovModel
        @param labelled_sequences: the training data, a set of
            labelled sequences of observations
        @type labelled_sequences: Token
        @param kwargs: may include an 'estimator' parameter, a function taking
            a C{FreqDist} and a number of bins and returning a C{ProbDistI};
            otherwise a MLE estimate is used
        """
        assert chktype(1, labelled_sequences, Token)

        # grab the property names used
        TEXT = self._properties.get("TEXT", "TEXT")
        TAG = self._properties.get("TAG", "TAG")
        SUBTOKENS = self._properties.get("SUBTOKENS", "SUBTOKENS")

        # default to the MLE estimate
        estimator = kwargs.get("estimator")
        if estimator == None:
            estimator = lambda fdist, bins: MLEProbDist(fdist)

        # count occurences of starting states, transitions out of each state
        # and output symbols observed in each state
        starting = FreqDist()
        transitions = ConditionalFreqDist()
        outputs = ConditionalFreqDist()
        for super_token in labelled_sequences[SUBTOKENS]:
            lasts = None
            for token in super_token[SUBTOKENS]:
                state = token[TAG]
                symbol = token[TEXT]
                if lasts == None:
                    starting.inc(state)
                else:
                    transitions[lasts].inc(state)
                outputs[state].inc(symbol)
                lasts = state

                # update the state and symbol lists
                if state not in self._states:
                    self._states.append(state)
                if symbol not in self._symbols:
                    self._symbols.append(symbol)

        # create probability distributions (with smoothing)
        N = len(self._states)
        pi = estimator(starting, N)
        A = ConditionalProbDist(transitions, estimator, False, N)
        B = ConditionalProbDist(outputs, estimator, False, len(self._symbols))

        return HiddenMarkovModel(self._symbols, self._states, A, B, pi, **self._properties)

예제 #16

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

    def tokenize(self, token, add_locs=False, add_contexts=False):
        assert chktype(1, token, Token)
        TEXT = self.property("TEXT")
        LOC = self.property("LOC")
        SUBTOKENS = self.property("SUBTOKENS")

        # If we're not adding locations, then just delegate to
        # raw_tokenize.
        if not add_locs:
            self._tokenize_from_raw(token, add_locs, add_contexts)
            return

        # This split will return a list of alternating matches and
        # non-matches.  If negative=1, then we want the even elements;
        # if negative=0, then we want the odd elements.
        words = self._regexp.split(token[TEXT])

        # Get the input token's source and start position.
        if token.has(LOC):
            source = token[LOC].source()
            pos = token[LOC].start()
        else:
            source = None
            pos = 0

        # Generate a list of subtokens with locations.
        subtoks = []
        for i, w in enumerate(words):
            if (i % 2 == 0) == self._negative and w != "":
                loc = CharSpanLocation(pos, pos + len(w), source)
                subtoks.append(Token({TEXT: w, LOC: loc}))
            pos += len(w)

        # Write subtoks to the SUBTOKENS property.
        token[SUBTOKENS] = subtoks

예제 #17

0

파일 보기

파일: __init__.py 프로젝트: shubhampachori12110095/navigation-corpus

    def __init__(self, n, reverse=False, cutoff=0, **property_names):
        """
        Construct a new I{n}-th order stochastic tagger.  The new
        tagger should be trained, using the L{train()} method, before
        it is used to tag data.
        
        @param n: The order of the new C{NthOrderTagger}.
        @type n: int
        @param reverse: If true, then assign tags to subtokens in
            reverse sequential order (i.e., from last to first).
        @type cutoff: C{int}
        @param cutoff: A count-cutoff for the tagger's frequency
            distribution.  If the tagger saw fewer than
            C{cutoff} examples of a given context in training,
            then it will return a tag of C{None} for that context.
        @type property_names: C{dict}
        @param property_names: A dictionary that can be used to override
            the default property names.  Each entry maps from a
            default property name to a new property name.
        """
        assert chktype(1, n, types.IntType)
        if n < 0: raise ValueError('n must be non-negative')
        SequentialTagger.__init__(self, reverse, **property_names)
        self._freqdist = ConditionalFreqDist()
        self._n = n
        self._cutoff = cutoff

        # Record the start & end indices of the context window for
        # tags.
        if self._reverse:
            self._left = 1
            self._right = 1+n
        else:
            self._left = -n
            self._right = 0

예제 #18

0

파일 보기

파일: multioutput.py 프로젝트: ronaldahmed/robot-navigation

 def __init__(self, states=None, symbols=None, **properties):
     """
     Creates an HMM trainer to induce an HMM with the given states and
     output symbol alphabet. Only a supervised training method may be used.
     """
     assert chktype(1,symbols,types.TupleType,types.ListType,types.NoneType)
     assert chktype(2,states,types.TupleType,types.ListType,types.NoneType)
     if states:
         self._states = states
     else:
         self._states = []
     if symbols:
         self._symbols = symbols
     else:
         self._symbols = []
     self._properties = properties

예제 #19

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def raw_tokenize(self, text):
     assert chktype(1, text, str)
     TEXT = self.property("TEXT")
     SUBTOKENS = self.property("SUBTOKENS")
     token = Token({TEXT: text})
     self.tokenize(token)
     return [subtok[TEXT] for subtok in token[SUBTOKENS]]

예제 #20

0

파일 보기

 def _centroid(self, cluster):
     assert chktype(1, cluster, [])
     assert len(cluster) > 0
     centroid = copy.copy(cluster[0])
     for vector in cluster[1:]:
         centroid += vector
     return centroid / float(len(cluster))

예제 #21

0

파일 보기

    def tokenize(self, token, add_locs=False, add_contexts=False):
        assert chktype(1, token, Token)
        TEXT = self.property('TEXT')
        LOC = self.property('LOC')
        SUBTOKENS = self.property('SUBTOKENS')

        # If we're not adding locations, then just delegate to
        # raw_tokenize.
        if not add_locs:
            self._tokenize_from_raw(token, add_locs, add_contexts)
            return

        # This split will return a list of alternating matches and
        # non-matches.  If negative=1, then we want the even elements;
        # if negative=0, then we want the odd elements.
        words = self._regexp.split(token[TEXT])

        # Get the input token's source and start position.
        if token.has(LOC):
            source = token[LOC].source()
            pos = token[LOC].start()
        else:
            source = None
            pos = 0

        # Generate a list of subtokens with locations.
        subtoks = []
        for i, w in enumerate(words):
            if (i % 2 == 0) == self._negative and w != '':
                loc = CharSpanLocation(pos, pos + len(w), source)
                subtoks.append(Token({TEXT: w, LOC: loc}))
            pos += len(w)

        # Write subtoks to the SUBTOKENS property.
        token[SUBTOKENS] = subtoks

예제 #22

0

파일 보기

파일: treebank.py 프로젝트: ronaldahmed/robot-navigation

    def read_token(self, s, add_contexts=False, add_locs=False, 
                   source=None):
        assert chktype(1, s, str)

        TEXT = self.property('TEXT')
        LOC = self.property('LOC')
        CONTEXT = self.property('CONTEXT')
        SENTS = self.property('SENTS')
        TREE = self.property('TREE')

        sentences = re.findall('(?s)\S.*?/\.', s)
        sent_toks = []
        for sent_num, sentence in enumerate(sentences):
            sent_loc = SentIndexLocation(sent_num, source)
            sent_tok = self._sent_reader.read_token(
                sentence, add_contexts=add_contexts,
                add_locs=add_locs, source=sent_loc)
            sent_toks.append(sent_tok)
        tok = Token(**{SENTS: sent_toks})

        # Add context pointers, if requested
        if add_contexts:
            for i, sent_tok in enumerate(tok[SENTS]):
                sent_tok[CONTEXT] = SubtokenContextPointer(tok, SENTS, i)

        # Return the finished token.
        return tok

예제 #23

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

    def probability(self, sequence):
        """
        Returns the probability of the given symbol sequence. If the sequence
        is labelled, then returns the joint probability of the symbol, state
        sequence. Otherwise, uses the forward algorithm to find the
        probability over all label sequences.

        @return: the probability of the sequence
        @rtype: float
        @param sequence: the sequence of symbols which must contain the TEXT
            property, and optionally the TAG property
        @type sequence:  Token
        """
        assert chktype(1, sequence, Token)

        SUBTOKENS = self._properties.get("SUBTOKENS", "SUBTOKENS")
        TEXT = self._properties.get("TEXT", "TEXT")
        TAG = self._properties.get("TAG", "TAG")

        symbols = sequence[SUBTOKENS]
        T = len(symbols)
        N = len(self._states)

        if T > 0 and symbols[0].has(TAG):
            last_state = symbols[0][TAG]
            p = self._priors.logprob(last_state) + self._outputs[last_state].logprob(symbols[0][TEXT])
            for t in range(1, T):
                state = symbols[t][TAG]
                p += self._transitions[last_state].logprob(state) + self._outputs[state].logprob(symbols[t][TEXT])
            return exp(p)
        else:
            alpha = self._forward_probability(sequence)
            p = _log_add(*alpha[T - 1, :])
            return exp(p)

예제 #24

0

파일 보기

파일: __init__.py 프로젝트: shubhampachori12110095/navigation-corpus

    def train(self, tagged_token):
        """
        Train this C{NthOrderTagger} using the given training data.
        If this method is called multiple times, then the training
        data will be combined.
        
        @param tagged_token: A tagged corpus.  Each subtoken in
            C{tagged_token} should define the C{text} and C{tag}
            properties.
        @type tagged_token: L{Token}
        """
        assert chktype(1, tagged_token, Token)
        SUBTOKENS = self.property('SUBTOKENS')
        TEXT = self.property('TEXT')
        TAG = self.property('TAG')
        left, right = self._left, self._right
        
        # Extract the list of subtokens & list of tags.
        subtokens = tagged_token[SUBTOKENS]
        tags = tuple([t[TAG] for t in subtokens])

        for i, subtok in enumerate(subtokens):
            if i+left<0: continue
            # Construct the context from the current subtoken's text
            # and the adjacent tokens' tags.
            context = (tags[i+left:i+right], subtok[TEXT])

            # Record the current token in the frequency distribution.
            tag = subtok[TAG]
            self._freqdist[context].inc(tag)

예제 #25

0

파일 보기

    def read_token(self, s, add_contexts=False, add_locs=False, source=None):
        assert chktype(1, s, str)

        TEXT = self.property('TEXT')
        LOC = self.property('LOC')
        CONTEXT = self.property('CONTEXT')
        SENTS = self.property('SENTS')
        TREE = self.property('TREE')

        sentences = re.findall('(?s)\S.*?/\.', s)
        sent_toks = []
        for sent_num, sentence in enumerate(sentences):
            sent_loc = SentIndexLocation(sent_num, source)
            sent_tok = self._sent_reader.read_token(sentence,
                                                    add_contexts=add_contexts,
                                                    add_locs=add_locs,
                                                    source=sent_loc)
            sent_toks.append(sent_tok)
        tok = Token(**{SENTS: sent_toks})

        # Add context pointers, if requested
        if add_contexts:
            for i, sent_tok in enumerate(tok[SENTS]):
                sent_tok[CONTEXT] = SubtokenContextPointer(tok, SENTS, i)

        # Return the finished token.
        return tok

예제 #26

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

    def best_path(self, unlabelled_sequence):
        """
        Returns the state sequence of the optimal (most probable) path through
        the HMM. Uses the Viterbi algorithm to calculate this part by dynamic
        programming.

        @return: the state sequence
        @rtype: sequence of any
        @param unlabelled_sequence: the sequence of unlabelled symbols 
        @type unlabelled_sequence: Token
        """
        assert chktype(1, unlabelled_sequence, Token)

        SUBTOKENS = "SUBTOKENS"
        TEXT = "TEXT"

        symbols = unlabelled_sequence[SUBTOKENS]
        T = len(symbols)
        N = len(self._states)
        V = zeros((T, N), Float64)
        B = {}

        # find the starting log probabilities for each state
        symbol = symbols[0][TEXT]
        for i in range(N):
            state = self._states[i]
            V[0, i] = self._priors.logprob(state) + self._output_logprob(state, symbol)
            B[0, state] = None

        # find the maximum log probabilities for reaching each state at time t
        for t in range(1, T):
            symbol = symbols[t][TEXT]
            for j in range(N):
                sj = self._states[j]
                best = None
                for i in range(N):
                    si = self._states[i]
                    va = V[t - 1, i] + self._transitions[si].logprob(sj)
                    if not best or va > best[0]:
                        best = (va, si)
                V[t, j] = best[0] + self._output_logprob(sj, symbol)
                B[t, sj] = best[1]

        # find the highest probability final state
        best = None
        for i in range(N):
            val = V[T - 1, i]
            if not best or val > best[0]:
                best = (val, self._states[i])

        # traverse the back-pointers B to find the state sequence
        current = best[1]
        sequence = [current]
        for t in range(T - 1, 0, -1):
            last = B[t, current]
            sequence.append(last)
            current = last

        sequence.reverse()
        return sequence

예제 #27

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def _centroid(self, cluster):
     assert chktype(1, cluster, [])
     assert len(cluster) > 0
     centroid = copy.copy(cluster[0])
     for vector in cluster[1:]:
         centroid += vector
     return centroid / float(len(cluster))

예제 #28

0

파일 보기

 def raw_tokenize(self, text):
     assert chktype(1, text, str)
     TEXT = self.property('TEXT')
     SUBTOKENS = self.property('SUBTOKENS')
     token = Token({TEXT: text})
     self.tokenize(token)
     return [subtok[TEXT] for subtok in token[SUBTOKENS]]

예제 #29

0

파일 보기

 def __init__(self,
              num_means,
              distance,
              repeats=1,
              conv_test=1e-6,
              initial_means=None,
              normalise=False,
              svd_dimensions=None,
              rng=None):
     """
     @param  num_means:  the number of means to use (may use fewer)
     @type   num_means:  int
     @param  distance:   measure of distance between two vectors
     @type   distance:   function taking two vectors and returing a float
     @param  repeats:    number of randomised clustering trials to use
     @type   repeats:    int
     @param  conv_test:  maximum variation in mean differences before
                         deemed convergent
     @type   conv_test:  number
     @param  initial_means: set of k initial means
     @type   initial_means: sequence of vectors
     @param  normalise:  should vectors be normalised to length 1
     @type   normalise:  boolean
     @param svd_dimensions: number of dimensions to use in reducing vector
                            dimensionsionality with SVD
     @type svd_dimensions: int 
     @param  rng:        random number generator (or None)
     @type   rng:        Random
     """
     assert chktype(1, num_means, int)
     #assert chktype(2, distance, ...)
     assert chktype(3, repeats, int)
     assert chktype(4, conv_test, int, float)
     #assert chktype(5, initial_means, [Numeric.array([])], [SparseArray])
     assert chktype(6, normalise, bool)
     assert chktype(7, svd_dimensions, int, types.NoneType)
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_means = num_means
     self._distance = distance
     self._max_difference = conv_test
     assert not initial_means or len(initial_means) == num_means
     self._means = initial_means
     assert repeats >= 1
     assert not (initial_means and repeats > 1)
     self._repeats = repeats
     if rng: self._rng = rng
     else: self._rng = random.Random()

예제 #30

0

파일 보기

 def raw_xtokenize(self, text):
     assert chktype(1, text, str)
     TEXT = self.property('TEXT')
     SUBTOKENS = self.property('SUBTOKENS')
     token = Token({TEXT: text})
     self.xtokenize(token)
     for subtok in token[SUBTOKENS]:
         yield subtok[TEXT]

예제 #31

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def raw_xtokenize(self, text):
     assert chktype(1, text, str)
     TEXT = self.property("TEXT")
     SUBTOKENS = self.property("SUBTOKENS")
     token = Token({TEXT: text})
     self.xtokenize(token)
     for subtok in token[SUBTOKENS]:
         yield subtok[TEXT]

예제 #32

0

파일 보기

파일: multioutput.py 프로젝트: shubhampachori12110095/navigation-corpus

 def __init__(self, states=None, symbols=None, **properties):
     """
     Creates an HMM trainer to induce an HMM with the given states and
     output symbol alphabet. Only a supervised training method may be used.
     """
     assert chktype(1, symbols, types.TupleType, types.ListType,
                    types.NoneType)
     assert chktype(2, states, types.TupleType, types.ListType,
                    types.NoneType)
     if states:
         self._states = states
     else:
         self._states = []
     if symbols:
         self._symbols = symbols
     else:
         self._symbols = []
     self._properties = properties

예제 #33

0

파일 보기

 def likelihood(self, labelled_token):
     assert chktype(1, labelled_token, Token)
     vector = labelled_token['FEATURES']
     #assert chktype('features', vector, Numeric.array([]), SparseArray)
     if self._should_normalise:
         vector = self._normalise(vector)
     if self._Tt != None:
         vector = Numeric.matrixmultiply(self._Tt, vector)
     return self.likelihood_vectorspace(vector, labelled_token['CLUSTER'])

예제 #34

0

파일 보기

 def __init__(self, items=[]):
     """
     @param  items: the items at the leaves of the dendogram
     @type   items: sequence of (any)
     """
     assert chktype(1, items, [])
     self._items = [_DendogramNode(item) for item in items]
     self._original_items = copy.copy(self._items)
     self._merge = 1

예제 #35

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def xtokenize(self, token, add_locs=False, add_contexts=False):
     assert chktype(1, token, Token)
     TEXT = self.property("TEXT")
     SUBTOKENS = self.property("SUBTOKENS")
     text = token[TEXT]
     if hasattr(text, "__iter__") and hasattr(text, "next"):
         token[TEXT] = "".join(text)
     self.tokenize(token, add_locs, add_contexts)
     token[SUBTOKENS] = iter(token[SUBTOKENS])

예제 #36

0

파일 보기

 def xtokenize(self, token, add_locs=False, add_contexts=False):
     assert chktype(1, token, Token)
     TEXT = self.property('TEXT')
     SUBTOKENS = self.property('SUBTOKENS')
     text = token[TEXT]
     if hasattr(text, '__iter__') and hasattr(text, 'next'):
         token[TEXT] = ''.join(text)
     self.tokenize(token, add_locs, add_contexts)
     token[SUBTOKENS] = iter(token[SUBTOKENS])

예제 #37

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def __init__(self, items=[]):
     """
     @param  items: the items at the leaves of the dendogram
     @type   items: sequence of (any)
     """
     assert chktype(1, items, [])
     self._items = [_DendogramNode(item) for item in items]
     self._original_items = copy.copy(self._items)
     self._merge = 1

예제 #38

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def likelihood(self, labelled_token):
     assert chktype(1, labelled_token, Token)
     vector = labelled_token['FEATURES']
     #assert chktype('features', vector, Numeric.array([]), SparseArray)
     if self._should_normalise:
         vector = self._normalise(vector)
     if self._Tt != None:
         vector = Numeric.matrixmultiply(self._Tt, vector)
     return self.likelihood_vectorspace(vector, labelled_token['CLUSTER'])

예제 #39

0

파일 보기

파일: eval.py 프로젝트: shubhampachori12110095/navigation-corpus

    def __init__(self, reference, test):
        """
        Construct a new confusion matrix from a list of reference
        values and a corresponding list of test values.
        
        @type reference: C{list}
        @param reference: An ordered list of reference values.
        @type test: C{list}
        @param test: A list of values to compare against the
            corresponding reference values.
        @raise ValueError: If C{reference} and C{length} do not have
            the same length.
        """
        assert chktype(1, reference, [])
        assert chktype(2, test, [])

        if len(reference) != len(test):
            raise ValueError('Lists must have the same length.')

        # Get a list of all values.
        values = dict([(val, 1) for val in reference + test]).keys()

        # Construct a value->index dictionary
        indices = dict([(val, i) for (i, val) in enumerate(values)])

        # Make a confusion matrix table.
        confusion = [[0 for val in values] for val in values]
        max_conf = 0  # Maximum confusion
        for w, g in zip(reference, test):
            confusion[indices[w]][indices[g]] += 1
            max_conf = max(max_conf, confusion[indices[w]][indices[g]])

        #: A list of all values in C{reference} or C{test}.
        self._values = values
        #: A dictionary mapping values in L{self._values} to their indices.
        self._indices = indices
        #: The confusion matrix itself (as a list of lists of counts).
        self._confusion = confusion
        #: The greatest count in L{self._confusion} (used for printing).
        self._max_conf = 0
        #: The total number of values in the confusion matrix.
        self._total = len(reference)
        #: The number of correct (on-diagonal) values in the matrix.
        self._correct = sum([confusion[i][i] for i in range(len(values))])

예제 #40

0

파일 보기

파일: eval.py 프로젝트: ronaldahmed/robot-navigation

    def __init__(self, reference, test):
        """
        Construct a new confusion matrix from a list of reference
        values and a corresponding list of test values.
        
        @type reference: C{list}
        @param reference: An ordered list of reference values.
        @type test: C{list}
        @param test: A list of values to compare against the
            corresponding reference values.
        @raise ValueError: If C{reference} and C{length} do not have
            the same length.
        """
        assert chktype(1, reference, [])
        assert chktype(2, test, [])
        
        if len(reference) != len(test):
            raise ValueError('Lists must have the same length.')
            
        # Get a list of all values.
        values = dict([(val,1) for val in reference+test]).keys()

        # Construct a value->index dictionary
        indices = dict([(val,i) for (i,val) in enumerate(values)])

        # Make a confusion matrix table.
        confusion = [[0 for val in values] for val in values]
        max_conf = 0 # Maximum confusion
        for w,g in zip(reference, test):
            confusion[indices[w]][indices[g]] += 1
            max_conf = max(max_conf, confusion[indices[w]][indices[g]])

        #: A list of all values in C{reference} or C{test}.
        self._values = values
        #: A dictionary mapping values in L{self._values} to their indices.
        self._indices = indices
        #: The confusion matrix itself (as a list of lists of counts).
        self._confusion = confusion
        #: The greatest count in L{self._confusion} (used for printing).
        self._max_conf = 0
        #: The total number of values in the confusion matrix.
        self._total = len(reference)
        #: The number of correct (on-diagonal) values in the matrix.
        self._correct = sum([confusion[i][i] for i in range(len(values))])

예제 #41

0

파일 보기

파일: eval.py 프로젝트: shubhampachori12110095/navigation-corpus

def recall(reference, test):
    """
    Given a set of reference values and a set of test values, return
    the percentage of reference values that appear in the test set.
    In particular, return |C{reference}S{cap}C{test}|/|C{reference}|.
    If C{reference} is empty, then return C{None}.
    
    @type reference: C{Set}
    @param reference: A set of reference values.
    @type test: C{Set}
    @param test: A set of values to compare against the reference set.
    @rtype: C{float} or C{None}
    """
    assert chktype(1, reference, sets.BaseSet)
    assert chktype(2, test, sets.BaseSet)
    if len(reference) == 0:
        return None
    else:
        return float(len(reference.intersection(test))) / len(reference)

예제 #42

0

파일 보기

 def classify(self, token):
     assert chktype(1, token, Token)
     vector = token['FEATURES']
     #assert chktype('features', vector, Numeric.array([]), SparseArray)
     if self._should_normalise:
         vector = self._normalise(vector)
     if self._Tt != None:
         vector = Numeric.matrixmultiply(self._Tt, vector)
     cluster = self.classify_vectorspace(vector)
     token['CLUSTER'] = self.cluster_name(cluster)

예제 #43

0

파일 보기

파일: __init__.py 프로젝트: shubhampachori12110095/navigation-corpus

    def tag(self, token):
        assert chktype(1, token, Token)
        SUBTOKENS = self.property('SUBTOKENS')
        TAG = self.property('TAG')

        # Tag each token, in sequential order.
        subtokens = token[SUBTOKENS]
        for i, subtoken in enumerate(subtokens):
            tag = self.tag_subtoken(subtokens, i)
            subtoken[TAG] = tag

예제 #44

0

파일 보기

파일: eval.py 프로젝트: ronaldahmed/robot-navigation

def recall(reference, test):
    """
    Given a set of reference values and a set of test values, return
    the percentage of reference values that appear in the test set.
    In particular, return |C{reference}S{cap}C{test}|/|C{reference}|.
    If C{reference} is empty, then return C{None}.
    
    @type reference: C{Set}
    @param reference: A set of reference values.
    @type test: C{Set}
    @param test: A set of values to compare against the reference set.
    @rtype: C{float} or C{None}
    """
    assert chktype(1, reference, sets.BaseSet)
    assert chktype(2, test, sets.BaseSet)
    if len(reference) == 0:
        return None
    else:
        return float(len(reference.intersection(test)))/len(reference)

예제 #45

0

파일 보기

    def __init__(self,
                 initial_means,
                 priors=None,
                 covariance_matrices=None,
                 conv_threshold=1e-6,
                 bias=0.1,
                 normalise=False,
                 svd_dimensions=None):
        """
        Creates an EM clusterer with the given starting parameters,
        convergence threshold and vector mangling parameters.

        @param  initial_means: the means of the gaussian cluster centers
        @type   initial_means: [seq of] Numeric array or seq of SparseArray
        @param  priors: the prior probability for each cluster
        @type   priors: Numeric array or seq of float
        @param  covariance_matrices: the covariance matrix for each cluster
        @type   covariance_matrices: [seq of] Numeric array 
        @param  conv_threshold: maximum change in likelihood before deemed
                    convergent
        @type   conv_threshold: int or float
        @param  bias: variance bias used to ensure non-singular covariance
                      matrices
        @type   bias: float
        @param  normalise:  should vectors be normalised to length 1
        @type   normalise:  boolean
        @param  svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        @type   svd_dimensions: int 
        """
        #assert chktype(1, initial_means, [])
        #assert chktype(2, priors, [], types.NoneType)
        #assert chktype(3, covariance_matrices, [], types.NoneType)
        assert chktype(4, conv_threshold, float, int)
        assert chktype(6, normalise, bool)
        assert chktype(7, svd_dimensions, int, types.NoneType)
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._means = Numeric.array(initial_means, Numeric.Float64)
        self._num_clusters = len(initial_means)
        self._conv_threshold = conv_threshold
        self._covariance_matrices = covariance_matrices
        self._priors = priors
        self._bias = bias

예제 #46

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def classify(self, token):
     assert chktype(1, token, Token)
     vector = token['FEATURES']
     #assert chktype('features', vector, Numeric.array([]), SparseArray)
     if self._should_normalise:
         vector = self._normalise(vector)
     if self._Tt != None:
         vector = Numeric.matrixmultiply(self._Tt, vector)
     cluster = self.classify_vectorspace(vector)
     token['CLUSTER'] = self.cluster_name(cluster)

예제 #47

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def __init__(self, num_means, distance, repeats=1,
                    conv_test=1e-6, initial_means=None,
                    normalise=False, svd_dimensions=None,
                    rng=None):
     """
     @param  num_means:  the number of means to use (may use fewer)
     @type   num_means:  int
     @param  distance:   measure of distance between two vectors
     @type   distance:   function taking two vectors and returing a float
     @param  repeats:    number of randomised clustering trials to use
     @type   repeats:    int
     @param  conv_test:  maximum variation in mean differences before
                         deemed convergent
     @type   conv_test:  number
     @param  initial_means: set of k initial means
     @type   initial_means: sequence of vectors
     @param  normalise:  should vectors be normalised to length 1
     @type   normalise:  boolean
     @param svd_dimensions: number of dimensions to use in reducing vector
                            dimensionsionality with SVD
     @type svd_dimensions: int 
     @param  rng:        random number generator (or None)
     @type   rng:        Random
     """
     assert chktype(1, num_means, int)
     #assert chktype(2, distance, ...)
     assert chktype(3, repeats, int)
     assert chktype(4, conv_test, int, float)
     #assert chktype(5, initial_means, [Numeric.array([])], [SparseArray])
     assert chktype(6, normalise, bool)
     assert chktype(7, svd_dimensions, int, types.NoneType)
     VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
     self._num_means = num_means
     self._distance = distance
     self._max_difference = conv_test
     assert not initial_means or len(initial_means) == num_means
     self._means = initial_means
     assert repeats >= 1
     assert not (initial_means and repeats > 1)
     self._repeats = repeats
     if rng: self._rng = rng
     else:   self._rng = random.Random()

예제 #48

0

파일 보기

파일: eval.py 프로젝트: ronaldahmed/robot-navigation

def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the percentage of corresponding values that are
    equal.  In particular, return the percentage of indices
    C{0<i<=len(test)} such that C{test[i] == reference[i]}.

    @type reference: C{list}
    @param reference: An ordered list of reference values.
    @type test: C{list}
    @param test: A list of values to compare against the corresponding
        reference values.
    @raise ValueError: If C{reference} and C{length} do not have the
        same length.
    """
    assert chktype(1, reference, [])
    assert chktype(2, test, [])
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    num_correct = [1 for x,y in zip(reference, test) if x==y]
    return float(len(num_correct)) / len(reference)

예제 #49

0

파일 보기

 def groups(self, n):
     """
     Finds the n-groups of items (leaves) reachable from a cut at depth n.
     @param  n: number of groups
     @type   n: int
     """
     assert chktype(1, n, int)
     if len(self._items) > 1:
         root = _DendogramNode(self._merge, *self._items)
     else:
         root = self._items[0]
     return root.groups(n)

예제 #50

0

파일 보기

파일: __init__.py 프로젝트: ronaldahmed/robot-navigation

 def groups(self, n):
     """
     Finds the n-groups of items (leaves) reachable from a cut at depth n.
     @param  n: number of groups
     @type   n: int
     """
     assert chktype(1, n, int)
     if len(self._items) > 1:
         root = _DendogramNode(self._merge, *self._items)
     else:
         root = self._items[0]
     return root.groups(n)

예제 #51

0

파일 보기

파일: eval.py 프로젝트: shubhampachori12110095/navigation-corpus

def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the percentage of corresponding values that are
    equal.  In particular, return the percentage of indices
    C{0<i<=len(test)} such that C{test[i] == reference[i]}.

    @type reference: C{list}
    @param reference: An ordered list of reference values.
    @type test: C{list}
    @param test: A list of values to compare against the corresponding
        reference values.
    @raise ValueError: If C{reference} and C{length} do not have the
        same length.
    """
    assert chktype(1, reference, [])
    assert chktype(2, test, [])
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    num_correct = [1 for x, y in zip(reference, test) if x == y]
    return float(len(num_correct)) / len(reference)

예제 #52

0

파일 보기

 def likelihood(self, labelled_token):
     """
     Returns the likelihood (a float) of the token having the
     corresponding cluster.
     """
     assert chktype(1, labelled_token, Token)
     assert labelled_token.has('CLUSTER')
     token = labelled_token.exclude('CLUSTER')
     self.classify(token)
     if token == labelled_token:
         return 1.0
     else:
         return 0.0

예제 #53

0

파일 보기

 def vector(self, token):
     """
     Returns the vector after normalisation and dimensionality reduction
     for the given token's FEATURES.
     """
     assert chktype(1, token, Token)
     vector = token['FEATURES']
     #assert chktype('features', vector, Numeric.array([]), SparseArray)
     if self._should_normalise:
         vector = self._normalise(vector)
     if self._Tt != None:
         vector = Numeric.matrixmultiply(self._Tt, vector)
     return vector

예제 #54

0

파일 보기

 def copy(self, deep=True):
     """
     @rtype: L{Token}
     @return: A new copy of this token.
     @param deep: If false, then the new token will use the same
         objects to encode feature values that the original token
         did.  If true, then the new token will use deep copies of
         the original token's feature values.  The default value
         of C{True} is almost always the correct choice.
     """
     assert chktype(1, deep, bool)
     if deep: return copy.deepcopy(self)
     else: return copy.copy(self)