def store_feature_counts(self, samples, chunksize=10000, processes=None): """ Stores feature:count histograms for samples in Redis with the ability to increment. Arguments: samples (list) -- List of samples in the format (text, label) Keyword Arguments: chunksize (int) -- Amount of samples to process at a time. processes (int) -- Amount of processors to use with multiprocessing. """ if "positive_feature_counts" and "negative_feature_counts" in self.r.keys(): return # do this with multiprocessing batch_job(samples, redis_feature_consumer, chunksize=chunksize, processes=processes)
def store_feature_counts(self, samples, chunksize=10000, processes=None): """ Stores feature:count histograms for samples in Redis with the ability to increment. Arguments: samples (list) -- List of samples in the format (text, label) Keyword Arguments: chunksize (int) -- Amount of samples to process at a time. processes (int) -- Amount of processors to use with multiprocessing. """ if 'positive_feature_counts' and 'negative_feature_counts' in self.r.keys( ): return #do this with multiprocessing batch_job(samples, redis_feature_consumer, chunksize=chunksize, processes=processes)
def producer(offset, length): if offset >= 50: return [] return range(offset, offset + length) #or producer can be a list #producer = range(100) queue = multiprocessing.Queue() def consumer(data): global queue for i in data: queue.put(i) batch_job(producer, consumer, 10) out = [] while not queue.empty(): out.append(queue.get()) print out ########NEW FILE######## __FILENAME__ = text # -*- coding: utf-8 -*- """Tools to deal with text processing.""" import re import string from nltk.tokenize import WhitespaceTokenizer from synt import config
if offset >= 50: return [] return range(offset, offset + length) #or producer can be a list #producer = range(100) queue = multiprocessing.Queue() def consumer(data): global queue for i in data: queue.put(i) batch_job(producer, consumer, 10) out = [] while not queue.empty(): out.append(queue.get()) print out ########NEW FILE######## __FILENAME__ = text # -*- coding: utf-8 -*- """Tools to deal with text processing.""" import re import string from nltk.tokenize import WhitespaceTokenizer from synt import config