コード例 #1
0
    def _merge_terms(self, iterlist):
        # Merge-sorts terms coming from a list of term iterators.

        # Create a map so we can look up each iterator by its id() value
        itermap = {}
        for it in iterlist:
            itermap[id(it)] = it

        # Fill in the list with the head term from each iterator.

        current = []
        for it in iterlist:
            term = next(it)
            current.append((term, id(it)))
        heapify(current)

        # Number of active iterators
        active = len(current)
        while active:
            # Peek at the first term in the sorted list
            term = current[0][0]

            # Re-iterate on all items in the list that have that term
            while active and current[0][0] == term:
                it = itermap[current[0][1]]
                try:
                    nextterm = next(it)
                    heapreplace(current, (nextterm, id(it)))
                except StopIteration:
                    heappop(current)
                    active -= 1

            # Yield the term
            yield term
コード例 #2
0
ファイル: reading.py プロジェクト: intabeta/inta
    def _merge_terms(self, iterlist):
        # Merge-sorts terms coming from a list of term iterators.

        # Create a map so we can look up each iterator by its id() value
        itermap = {}
        for it in iterlist:
            itermap[id(it)] = it

        # Fill in the list with the head term from each iterator.

        current = []
        for it in iterlist:
            term = next(it)
            current.append((term, id(it)))
        heapify(current)

        # Number of active iterators
        active = len(current)
        while active:
            # Peek at the first term in the sorted list
            term = current[0][0]

            # Re-iterate on all items in the list that have that term
            while active and current[0][0] == term:
                it = itermap[current[0][1]]
                try:
                    nextterm = next(it)
                    heapreplace(current, (nextterm, id(it)))
                except StopIteration:
                    heappop(current)
                    active -= 1

            # Yield the term
            yield term
コード例 #3
0
    def __iter__(self):
        ids = iter(self.idset)
        try:
            nx = next(ids)
        except StopIteration:
            nx = -1

        for i in range(self.limit):
            if i == nx:
                try:
                    nx = next(ids)
                except StopIteration:
                    nx = -1
            else:
                yield i
コード例 #4
0
ファイル: idsets.py プロジェクト: Apophus/microblog
    def __iter__(self):
        ids = iter(self.idset)
        try:
            nx = next(ids)
        except StopIteration:
            nx = -1

        for i in xrange(self.limit):
            if i == nx:
                try:
                    nx = next(ids)
                except StopIteration:
                    nx = -1
            else:
                yield i
コード例 #5
0
ファイル: fsa.py プロジェクト: sangensong/whoosh-1
 def remap(state):
     if state in mapping:
         newnum = mapping[state]
     else:
         newnum = next(c)
         mapping[state] = newnum
     return newnum
コード例 #6
0
ファイル: fsa.py プロジェクト: BLourence/RemoteIR
 def remap(state):
     if state in mapping:
         newnum = mapping[state]
     else:
         newnum = next(c)
         mapping[state] = newnum
     return newnum
コード例 #7
0
ファイル: reading.py プロジェクト: adamhorner/yaki-tng
    def _merge_terms(self, iterlist):
        # Merge-sorts terms coming from a list of term iterators.

        # Create a map so we can look up each iterator by its id() value
        itermap = {}
        for it in iterlist:
            itermap[id(it)] = it

        # Fill in the list with the head term from each iterator.

        current = []
        for it in iterlist:
            try:
                term = next(it)
            except StopIteration:
                continue
            current.append((term, id(it)))
        # Number of active iterators
        active = len(current)

        # If only one iterator is active, just yield from it and return
        if active == 1:
            term, itid = current[0]
            it = itermap[itid]
            yield term
            for term in it:
                yield term
            return

        # Otherwise, do a streaming heap sort of the terms from the iterators
        heapify(current)
        while active:
            # Peek at the first term in the sorted list
            term = current[0][0]

            # Re-iterate on all items in the list that have that term
            while active and current[0][0] == term:
                it = itermap[current[0][1]]
                try:
                    nextterm = next(it)
                    heapreplace(current, (nextterm, id(it)))
                except StopIteration:
                    heappop(current)
                    active -= 1

            # Yield the term
            yield term
コード例 #8
0
ファイル: reading.py プロジェクト: sudhir-12/spoken-website
    def _merge_terms(self, iterlist):
        # Merge-sorts terms coming from a list of term iterators.

        # Create a map so we can look up each iterator by its id() value
        itermap = {}
        for it in iterlist:
            itermap[id(it)] = it

        # Fill in the list with the head term from each iterator.

        current = []
        for it in iterlist:
            try:
                term = next(it)
            except StopIteration:
                continue
            current.append((term, id(it)))
        # Number of active iterators
        active = len(current)

        # If only one iterator is active, just yield from it and return
        if active == 1:
            term, itid = current[0]
            it = itermap[itid]
            yield term
            for term in it:
                yield term
            return

        # Otherwise, do a streaming heap sort of the terms from the iterators
        heapify(current)
        while active:
            # Peek at the first term in the sorted list
            term = current[0][0]

            # Re-iterate on all items in the list that have that term
            while active and current[0][0] == term:
                it = itermap[current[0][1]]
                try:
                    nextterm = next(it)
                    heapreplace(current, (nextterm, id(it)))
                except StopIteration:
                    heappop(current)
                    active -= 1

            # Yield the term
            yield term
コード例 #9
0
ファイル: enron.py プロジェクト: ws-os/oh-mainline
 def get_texts(archive):
     archive = tarfile.open(archive, "r:gz")
     while True:
         entry = next(archive)
         archive.members = []
         if entry is None:
             break
         f = archive.extractfile(entry)
         if f is not None:
             text = f.read()
             yield text
コード例 #10
0
ファイル: enron.py プロジェクト: JunjieHu/dl
 def get_texts(archive):
     archive = tarfile.open(archive, "r:gz")
     while True:
         entry = next(archive)
         archive.members = []
         if entry is None:
             break
         f = archive.extractfile(entry)
         if f is not None:
             text = f.read()
             yield text
コード例 #11
0
ファイル: filters.py プロジェクト: hbwzhsh/NLP_pro1
    def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1
コード例 #12
0
ファイル: filters.py プロジェクト: AyomP/dailyfresh
    def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1
コード例 #13
0
ファイル: fsa.py プロジェクト: sangensong/whoosh-1
def u_to_utf8(dfa, base=0):
    c = itertools.count(base)
    transitions = dfa.transitions

    for src, trans in iteritems(transitions):
        trans = transitions[src]
        for label, dest in list(iteritems(trans)):
            if label is EPSILON:
                continue
            elif label is ANY:
                raise Exception
            else:
                assert isinstance(label, text_type)
                label8 = label.encode("utf8")
                for i, byte in enumerate(label8):
                    if i < len(label8) - 1:
                        st = next(c)
                        dfa.add_transition(src, byte, st)
                        src = st
                    else:
                        dfa.add_transition(src, byte, dest)
                del trans[label]
コード例 #14
0
ファイル: fsa.py プロジェクト: BLourence/RemoteIR
def u_to_utf8(dfa, base=0):
    c = itertools.count(base)
    transitions = dfa.transitions

    for src, trans in iteritems(transitions):
        trans = transitions[src]
        for label, dest in list(iteritems(trans)):
            if label is EPSILON:
                continue
            elif label is ANY:
                raise Exception
            else:
                assert isinstance(label, text_type)
                label8 = label.encode("utf8")
                for i, byte in enumerate(label8):
                    if i < len(label8) - 1:
                        st = next(c)
                        dfa.add_transition(src, byte, st)
                        src = st
                    else:
                        dfa.add_transition(src, byte, dest)
                del trans[label]
コード例 #15
0
ファイル: fsa.py プロジェクト: sangensong/whoosh-1
def strings_dfa(strings):
    dfa = DFA(0)
    c = itertools.count(1)

    last = ""
    seen = {}
    nodes = [DMNode(0)]

    for string in strings:
        if string <= last:
            raise Exception("Strings must be in order")
        if not string:
            raise Exception("Can't add empty string")

        # Find the common prefix with the previous string
        i = 0
        while i < len(last) and i < len(string) and last[i] == string[i]:
            i += 1
        prefixlen = i

        # Freeze the transitions after the prefix, since they're not shared
        add_suffix(dfa, nodes, last, prefixlen + 1, seen)

        # Create new nodes for the substring after the prefix
        for label in string[prefixlen:]:
            node = DMNode(next(c))
            # Create an arc from the previous node to this node
            nodes[-1].arcs[label] = node.n
            nodes.append(node)
        # Mark the last node as an accept state
        nodes[-1].final = True

        last = string

    if len(nodes) > 1:
        add_suffix(dfa, nodes, last, 0, seen)
    return dfa
コード例 #16
0
ファイル: fsa.py プロジェクト: BLourence/RemoteIR
def strings_dfa(strings):
    dfa = DFA(0)
    c = itertools.count(1)

    last = ""
    seen = {}
    nodes = [DMNode(0)]

    for string in strings:
        if string <= last:
            raise Exception("Strings must be in order")
        if not string:
            raise Exception("Can't add empty string")

        # Find the common prefix with the previous string
        i = 0
        while i < len(last) and i < len(string) and last[i] == string[i]:
            i += 1
        prefixlen = i

        # Freeze the transitions after the prefix, since they're not shared
        add_suffix(dfa, nodes, last, prefixlen + 1, seen)

        # Create new nodes for the substring after the prefix
        for label in string[prefixlen:]:
            node = DMNode(next(c))
            # Create an arc from the previous node to this node
            nodes[-1].arcs[label] = node.n
            nodes.append(node)
        # Mark the last node as an accept state
        nodes[-1].final = True

        last = string

    if len(nodes) > 1:
        add_suffix(dfa, nodes, last, 0, seen)
    return dfa
コード例 #17
0
ファイル: filters.py プロジェクト: MNI-NIL/NIL-MNI.github.io
 def __call__(self, tokens):
     # Only selects on the first token
     t = next(tokens)
     filter = self.filters.get(t.mode, self.default_filter)
     return filter(chain([t], tokens))
コード例 #18
0
ファイル: filters.py プロジェクト: AyomP/dailyfresh
 def __call__(self, tokens):
     # Only selects on the first token
     t = next(tokens)
     filter = self.filters.get(t.mode, self.default_filter)
     return filter(chain([t], tokens))