Python text_process Examples

Programming Language: Python

Namespace/Package Name: text_processor

Method/Function: text_process

Examples at hotexamples.com: 4

Python text_process - 4 examples found. These are the top rated real world Python examples of text_processor.text_process extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: timeseries_utils.py Project: gaphex/Oracle

def process_batch(cur, geo=False, fsw=False, stem=False):
    st = datetime.now()
    i = 0
    r = []
    l = cur.count()
    for doc in cur:
        if i == 0:
            stt = doc['created']
        t = text_process(doc, geo=geo, filter_sw=fsw, stem=stem)
        r.append({'words': t[0].split(), 'created_at': doc['created'], 'geo': t[1]})
        i += 1
        progress(i, l, skip=100)
    end = doc['created']
    print '\nretrieval and processing took', datetime.now() - st
    return r, stt, end

Example #2

Show file

def process_batch(cur, geo=False, fsw=False, stem=False):
    st = datetime.now()
    i = 0
    r = []
    l = cur.count()
    for doc in cur:
        if i == 0:
            stt = doc['created']
        t = text_process(doc, geo=geo, filter_sw=fsw, stem=stem)
        r.append({
            'words': t[0].split(),
            'created_at': doc['created'],
            'geo': t[1]
        })
        i += 1
        progress(i, l, skip=100)
    end = doc['created']
    print '\nretrieval and processing took', datetime.now() - st
    return r, stt, end

Example #3

Show file

File: build_corpus.py Project: gaphex/Oracle

f = open('assets/tw_ht_corpus_2.txt', 'a')
p = MDB('tweets')
cols = p.client['tweets'].collection_names()
cols.remove('SPB')
cols.remove('EKB')
cols.remove('Moscow')
print cols
i = 0

counts = []
for c in cols:
    ml = p.client['tweets'][c].find()
    counts.append(ml.count())
    
total = sum(counts)
print 'total:', total, 'documents'

for c in cols:
    ml = p.client['tweets'][c].find()
    for t in ml:
        try:
            dt = text_process(t)[0]
            progress(i, total)
            if dt:
                f.write(dt + '\n')
        except Exception as e:
            print e
        finally:
            i += 1

Example #4

Show file

f = open('assets/tw_ht_corpus_2.txt', 'a')
p = MDB('tweets')
cols = p.client['tweets'].collection_names()
cols.remove('SPB')
cols.remove('EKB')
cols.remove('Moscow')
print cols
i = 0

counts = []
for c in cols:
    ml = p.client['tweets'][c].find()
    counts.append(ml.count())

total = sum(counts)
print 'total:', total, 'documents'

for c in cols:
    ml = p.client['tweets'][c].find()
    for t in ml:
        try:
            dt = text_process(t)[0]
            progress(i, total)
            if dt:
                f.write(dt + '\n')
        except Exception as e:
            print e
        finally:
            i += 1