Exemple #1
0
    def store_feature_counts(self, samples, chunksize=10000, processes=None):
        """
        Stores feature:count histograms for samples in Redis with the ability to increment.

        Arguments:
        samples (list) -- List of samples in the format (text, label)

        Keyword Arguments:
        chunksize (int) -- Amount of samples to process at a time.
        processes (int) -- Amount of processors to use with multiprocessing.

        """

        if "positive_feature_counts" and "negative_feature_counts" in self.r.keys():
            return

        # do this with multiprocessing
        batch_job(samples, redis_feature_consumer, chunksize=chunksize, processes=processes)
Exemple #2
0
    def store_feature_counts(self, samples, chunksize=10000, processes=None):
        """
        Stores feature:count histograms for samples in Redis with the ability to increment.

        Arguments:
        samples (list) -- List of samples in the format (text, label)

        Keyword Arguments:
        chunksize (int) -- Amount of samples to process at a time.
        processes (int) -- Amount of processors to use with multiprocessing.

        """

        if 'positive_feature_counts' and 'negative_feature_counts' in self.r.keys(
        ):
            return

        #do this with multiprocessing
        batch_job(samples,
                  redis_feature_consumer,
                  chunksize=chunksize,
                  processes=processes)
Exemple #3
0
    def producer(offset, length):
        if offset >= 50:
            return []
        return range(offset, offset + length)

    #or producer can be a list
    #producer = range(100)

    queue = multiprocessing.Queue()
    def consumer(data):
        global queue

        for i in data:
            queue.put(i)

    batch_job(producer, consumer, 10)

    out = []

    while not queue.empty():
        out.append(queue.get())
    print out

########NEW FILE########
__FILENAME__ = text
# -*- coding: utf-8 -*-
"""Tools to deal with text processing."""
import re
import string
from nltk.tokenize import WhitespaceTokenizer
from synt import config
Exemple #4
0
        if offset >= 50:
            return []
        return range(offset, offset + length)

    #or producer can be a list
    #producer = range(100)

    queue = multiprocessing.Queue()

    def consumer(data):
        global queue

        for i in data:
            queue.put(i)

    batch_job(producer, consumer, 10)

    out = []

    while not queue.empty():
        out.append(queue.get())
    print out

########NEW FILE########
__FILENAME__ = text
# -*- coding: utf-8 -*-
"""Tools to deal with text processing."""
import re
import string
from nltk.tokenize import WhitespaceTokenizer
from synt import config