Python NBestList.write Examples

Programming Language: Python

Namespace/Package Name: dlm.io.nbestReader

Class/Type: NBestList

Method/Function: write

Examples at hotexamples.com: 4

Python NBestList.write - 4 examples found. These are the top rated real world Python examples of dlm.io.nbestReader.NBestList.write extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NBestList(5)

close(2)

write(2)

next(1)

Example #1

Show file

def augment(model_path, input_nbest_path, vocab_path, output_nbest_path):
    classifier = MLP(model_path=model_path)
    evaluator = eval.Evaluator(None, classifier)

    vocab = VocabManager(vocab_path)

    ngram_size = classifier.ngram_size

    def get_ngrams(tokens):
        for i in range(ngram_size - 1):
            tokens.insert(0, '<s>')
        if vocab.has_end_padding:
            tokens.append('</s>')
        indices = vocab.get_ids_given_word_list(tokens)
        return U.get_all_windows(indices, ngram_size)

    input_nbest = NBestList(input_nbest_path, mode='r')
    output_nbest = NBestList(output_nbest_path, mode='w')

    L.info('Augmenting: ' + input_nbest_path)

    start_time = time.time()

    counter = 0
    cache = dict()
    for group in input_nbest:
        ngram_list = []
        for item in group:
            tokens = item.hyp.split()
            ngrams = get_ngrams(tokens)
            for ngram in ngrams:
                if not cache.has_key(str(ngram)):
                    ngram_list.append(ngram)
                    cache[str(ngram)] = 1000
        if len(ngram_list) > 0:
            ngram_array = np.asarray(ngram_list, dtype='int32')
            ngram_log_prob_list = evaluator.get_ngram_log_prob(
                ngram_array[:, 0:-1], ngram_array[:, -1])
            for i in range(len(ngram_list)):
                cache[str(ngram_list[i])] = ngram_log_prob_list[i]
        for item in group:
            tokens = item.hyp.split()
            ngrams = get_ngrams(tokens)
            sum_ngram_log_prob = 0
            for ngram in ngrams:
                sum_ngram_log_prob += cache[str(ngram)]
            item.append_feature(sum_ngram_log_prob)
            output_nbest.write(item)
        #print counter
        counter += 1
    output_nbest.close()

    L.info("Ran for %.2fs" % (time.time() - start_time))

Example #2

Show file

File: augmenter.py Project: nusnlp/corelm

def augment(model_path, input_nbest_path, vocab_path, output_nbest_path):
	classifier = MLP(model_path=model_path)
	evaluator = eval.Evaluator(None, classifier)

	vocab = VocabManager(vocab_path)

	ngram_size = classifier.ngram_size

	def get_ngrams(tokens):
		for i in range(ngram_size - 1):
			tokens.insert(0, '<s>')
		if vocab.has_end_padding:
			tokens.append('</s>')
		indices = vocab.get_ids_given_word_list(tokens)
		return U.get_all_windows(indices, ngram_size)

	input_nbest = NBestList(input_nbest_path, mode='r')
	output_nbest = NBestList(output_nbest_path, mode='w')

	L.info('Augmenting: ' + input_nbest_path)
	
	start_time = time.time()

	counter = 0
	cache = dict()
	for group in input_nbest:
		ngram_list = []
		for item in group:
			tokens = item.hyp.split()
			ngrams = get_ngrams(tokens)
			for ngram in ngrams:
				if not cache.has_key(str(ngram)):
					ngram_list.append(ngram)
					cache[str(ngram)] = 1000
		if len(ngram_list) > 0:
			ngram_array = np.asarray(ngram_list, dtype='int32')
			ngram_log_prob_list = evaluator.get_ngram_log_prob(ngram_array[:,0:-1], ngram_array[:,-1])
			for i in range(len(ngram_list)):
				cache[str(ngram_list[i])] = ngram_log_prob_list[i]
		for item in group:
			tokens = item.hyp.split()
			ngrams = get_ngrams(tokens)
			sum_ngram_log_prob = 0
			for ngram in ngrams:
				sum_ngram_log_prob += cache[str(ngram)]
			item.append_feature(sum_ngram_log_prob)
			output_nbest.write(item)
		#print counter
		counter += 1
	output_nbest.close()

	L.info("Ran for %.2fs" % (time.time() - start_time))

Example #3

Show file

        try:
            group_list.append(input_nbest.next())
        except StopIteration:
            flag = False
    if len(group_list) > 0:
        outputs = pool.map(process_group, group_list)
        for i in range(len(group_list)):
            scores = outputs[i]
            group = group_list[i]
            sorted_indices = sorted(scores, key=scores.get, reverse=True)
            if args.out_scores_path:
                for idx in scores:
                    output_scores.write(
                        str(group.group_index) + ' ' + str(idx) + ' ' +
                        str(scores[idx]) + "\n")
            if args.out_nbest_path:
                for idx in sorted_indices:
                    output_nbest.write(group[idx])
            output_1best.write(group[sorted_indices[0]].hyp + "\n")
        counter += 1
        group_counter += len(group_list)
        if counter % 5 == 0:
            L.info("%i groups processed" % (group_counter))
L.info("Finished processing %i groups" % (group_counter))

if args.out_scores_path:
    output_scores.close()
if args.out_nbest_path:
    output_nbest.close()
output_1best.close()

Example #4

Show file

File: oracle.py Project: tamhd/corelm

	group_list = []
	for i in range(args.threads):
		try:
			group_list.append(input_nbest.next())
		except StopIteration:
			flag = False
	if len(group_list) > 0:
		outputs = pool.map(process_group, group_list)
		for i in range(len(group_list)):
			scores = outputs[i]
			group = group_list[i]
			sorted_indices = sorted(scores, key=scores.get, reverse=True)
			if args.out_scores_path:
				for idx in scores:
					output_scores.write(str(group.group_index) + ' ' + str(idx) + ' ' + str(scores[idx]) + "\n")
			if args.out_nbest_path:
				for idx in sorted_indices:
					output_nbest.write(group[idx])
			output_1best.write(group[sorted_indices[0]].hyp + "\n")
		counter += 1
		group_counter += len(group_list)
		if counter % 5 == 0:
			L.info("%i groups processed" % (group_counter))
L.info("Finished processing %i groups" % (group_counter))

if args.out_scores_path:
	output_scores.close()
if args.out_nbest_path:
	output_nbest.close()
output_1best.close()