Python process Examples, dataparser.process Python Examples

Example #1

0

Show file

File: pipeline.py Project: Tomaat/grammarCorrector

def main(history=1,tiny='.tiny',tbank = None):
	assert history >= 1, "use at least some history"
	t1 = time()
	TRAIN_FILE = '../release3.2/final_data/train-data.pre'
	VAL_FILE =   '../release3.2/final_data/validate-data.pre'
	print 'loading tree bank'
	t2 = time()-t1
	if tbank is None:
		tbank = dts.tbankparser()
	print 'loading sentences'
	dp._init_(tbank)
	all_sentences, feature_dict = dp.process(TRAIN_FILE,history)
	val_sentences, _val_feat = dp.process(VAL_FILE,history)
	t3 = time()-t1-t2
	print "features has been made"
	print "init perceptron"
	sp._init_(len(feature_dict),dts, False)
	print "end init"
	out( ('SSE random weights, only Ne-tags',flaws(dts,val_sentences,feature_dict,tbank,history,with_tags=False)) )
	print "SSE random weights, only Ne-tags"
	out( ( 'SSE random weights',flaws(dts,val_sentences,feature_dict,tbank,history) ) )
	print "SSE random weight"
	t4 = time()
	print "learning"
	weights = sp.train_perceptron(all_sentences, feature_dict, tbank, history)
	np.save('weights'+str(history)+tiny+'.npy',weights)
	t4 = time()-t4
	print weights.shape
	t1=time()-t1
	print "validating"
	out( ( 'after %d sentences, only Ne-tags'%(len(all_sentences)), flaws(dts, val_sentences,feature_dict,tbank,history,weights,False) ) )
	out( ( 'after %d sentences'%(len(all_sentences)), flaws(dts, val_sentences,feature_dict,tbank,history,weights) ) )
	out( ( 'total %f sec (loading: %f, %f; training: %f'%(t1,t2,t3,t4) ) )
	return feature_dict,weights

Example #2

0

Show file

File: pipeline.py Project: Tomaat/grammarCorrector

def main(history=1, tiny='.tiny', tbank=None):
    """
		run the whole proces 
	"""
    assert history >= 1, """"use at least some history"""
    t1 = time()

    TRAIN_FILE = '../release3.2/final_data/train-data.pre'
    VAL_FILE = '../release3.2/final_data/validate-data.pre'

    print 'loading tree bank'
    t2 = time() - t1
    if tbank is None:
        tbank = dts.tbankparser()
    print 'loading sentences'

    dp._init_(tbank)
    all_sentences, feature_dict = dp.process(TRAIN_FILE, history)
    val_sentences, _val_feat = dp.process(VAL_FILE, history)
    t3 = time() - t1 - t2

    print "features has been made"
    print "init perceptron"
    sp._init_(len(feature_dict), dts, False)

    print "end init"
    out(('SSE random weights, only Ne-tags',
         flaws(dts,
               val_sentences,
               feature_dict,
               tbank,
               history,
               with_tags=False)))
    print "SSE random weights, only Ne-tags"
    out(('SSE random weights',
         flaws(dts, val_sentences, feature_dict, tbank, history)))

    print "SSE random weight"
    t4 = time()
    print "learning"
    weights = sp.train_perceptron(all_sentences, feature_dict, tbank, history)
    np.save('weights' + str(history) + tiny + '.npy', weights)
    t4 = time() - t4
    print weights.shape

    t1 = time() - t1
    print "validating"
    out(('after %d sentences, only Ne-tags' % (len(all_sentences)),
         flaws(dts, val_sentences, feature_dict, tbank, history, weights,
               False)))
    out(('after %d sentences' % (len(all_sentences)),
         flaws(dts, val_sentences, feature_dict, tbank, history, weights)))
    out(('total %f sec (loading: %f, %f; training: %f' % (t1, t2, t3, t4)))
    return feature_dict, weights

Example #3

0

Show file

File: pipeline.py Project: Tomaat/grammarCorrector

def test():
    TRAIN_FILE = '../release3.2/data/train.data.tiny'
    print 'loading sentences'
    t1 = time()
    all_sentences, feature_dict = dp.process(TRAIN_FILE)
    t1 = time() - t1
    t2 = time()
    all_sentences, feature_dict = dp.process_multi(TRAIN_FILE, 6)
    t2 = time() - t2
    print t1, t2

Example #4

0

Show file

File: pipeline.py Project: Tomaat/grammarCorrector

def test():
	TRAIN_FILE = '../release3.2/data/train.data.tiny'
	print 'loading sentences'
	t1=time()
	all_sentences, feature_dict = dp.process(TRAIN_FILE)
	t1=time()-t1
	t2=time()
	all_sentences, feature_dict = dp.process_multi(TRAIN_FILE,6)
	t2=time()-t2
	print t1,t2

Example #5

0

Show file

File: pipeline.py Project: Tomaat/grammarCorrector

def test():
    """
		function test test the process functions
	"""
    TRAIN_FILE = '../release3.2/data/train.data.tiny'
    print 'loading sentences'
    t1 = time()
    all_sentences, feature_dict = dp.process(TRAIN_FILE)
    t1 = time() - t1
    t2 = time() - t2
    print t1, t2

Example #6

0

Show file

File: pipeline.py Project: Tomaat/grammarCorrector

def test():
	"""
		function test test the process functions
	"""
	TRAIN_FILE = '../release3.2/data/train.data.tiny'
	print 'loading sentences'
	t1=time()
	all_sentences, feature_dict = dp.process(TRAIN_FILE)
	t1=time()-t1
	t2=time()-t2
	print t1,t2

Example #7

0

Show file

File: preprocessNgrams.py Project: Tomaat/grammarCorrector

		parw = '-START-'
		sentence.insert(0,parw)
		sentence.insert(0,parw)
		sentence.insert(0,parw)
		return sentence
	else:
		parw = parent.orth_
		current_word = parent
		sentence.insert(0,parw)
		return recursive_tree_climb(current_word, sentence)

if __name__ == '__main__':
	
	print 'start'
	TRAIN_FILE = 'test_data/test_linear.txt' #'../release3.2/data/test.txt'
	all_sentences, feature_dict = dp.process(TRAIN_FILE,1)
	tbank = dts.tbankparser()
	text_file = open("preprocessed-4gram-sentences2.txt", "w")
	print "start looping through sentece"
	
	for sentence in all_sentences:
		try:
			seen_mistakes = []
			parsed_sentence = tbank.parse(sentence.raw_sentence)
			context_tags = [word_tag[1] for word_tag in sentence.words_tags]
			for i in range(0,len(sentence.raw_sentence.split(' '))):
				if context_tags[i] != "Ne":
					cur = parsed_sentence[i]
					sentence_array = []
					sentence_array.insert(0,cur.orth_)
					result = recursive_tree_climb(cur, sentence_array)

Example #8

0

Show file

File: preprocessNgrams.py Project: Tomaat/grammarCorrector

        sentence.insert(0, parw)
        sentence.insert(0, parw)
        sentence.insert(0, parw)
        return sentence
    else:
        parw = parent.orth_
        current_word = parent
        sentence.insert(0, parw)
        return recursive_tree_climb(current_word, sentence)


if __name__ == '__main__':

    print 'start'
    TRAIN_FILE = 'test_data/test_linear.txt'  #'../release3.2/data/test.txt'
    all_sentences, feature_dict = dp.process(TRAIN_FILE, 1)
    tbank = dts.tbankparser()
    text_file = open("preprocessed-4gram-sentences2.txt", "w")
    print "start looping through sentece"

    for sentence in all_sentences:
        try:
            seen_mistakes = []
            parsed_sentence = tbank.parse(sentence.raw_sentence)
            context_tags = [word_tag[1] for word_tag in sentence.words_tags]
            for i in range(0, len(sentence.raw_sentence.split(' '))):
                if context_tags[i] != "Ne":
                    cur = parsed_sentence[i]
                    sentence_array = []
                    sentence_array.insert(0, cur.orth_)
                    result = recursive_tree_climb(cur, sentence_array)