Python getfilelines Examples

Programming Language: Python

Namespace/Package Name: src.common.myutils

Method/Function: getfilelines

Examples at hotexamples.com: 3

Python getfilelines - 3 examples found. These are the top rated real world Python examples of src.common.myutils.getfilelines extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: ngram_corpus.py Project: yyaghoobzadeh/figment_v2

def build_ngram_corpus(args):
    indir = args[2]
    maxngram = int(args[3])
    mydir = indir + args[3] 
    sampled_lines = getfilelines(indir + '/all.sampled', upto=-1)
    print 'sampled lines are loaded'
    lines = filter_sentences(sampled_lines)
    print 'filtering finished'
    if not os.path.exists(mydir): os.makedirs(mydir)
    ngram2freq = calc_ngram_freq(lines, maxngram) 
    voc2idx = build_vocab(ngram2freq, min_freq=5)
    process_corpus(lines, voc2idx, ngram2freq, maxngram, mydir + '/corpus,processed.txt')
    write_vocab(voc2idx, ngram2freq, mydir + '/vocab-count.txt')

Example #2

Show file

def build_ngram_corpus(args):
    indir = args[2]
    maxngram = int(args[3])
    mydir = indir + args[3]
    sampled_lines = getfilelines(indir + '/all.sampled', upto=-1)
    print 'sampled lines are loaded'
    lines = filter_sentences(sampled_lines)
    print 'filtering finished'
    if not os.path.exists(mydir): os.makedirs(mydir)
    ngram2freq = calc_ngram_freq(lines, maxngram)
    voc2idx = build_vocab(ngram2freq, min_freq=5)
    process_corpus(lines, voc2idx, ngram2freq, maxngram,
                   mydir + '/corpus,processed.txt')
    write_vocab(voc2idx, ngram2freq, mydir + '/vocab-count.txt')

Example #3

Show file

File: extend_dstypes_parents.py Project: yyaghoobzadeh/figment-multi

def load_lines(lines_file, e2types, upto=-1):
    logger.info('loading lines from %s ...', lines_file)
    e2lines = defaultdict(list)
    e2freq = defaultdict(lambda: 0)
    t2lines = defaultdict(list)
    c = 0
    lines = getfilelines(lines_file)
    for line in lines:
        parts = line.split('\t')
        if len(parts) != 5:
            print len(parts)
            print line
        assert  len(parts) == 5
        mye = parseents(parts[1])[0]
        text = parts[4].strip()
        e2lines[mye].append(text)
        e2freq[mye] += 1
        t2lines[e2types[mye][0]].append((text,mye)) # Add text to the notable type of mye 
        if c == upto:
            break
        c += 1
    logger.info('... lines loaded')
    
    return (e2lines,t2lines, e2freq)