Python tokenize Examples

Programming Language: Python

Namespace/Package Name: kaggle

Method/Function: tokenize

Examples at hotexamples.com: 3

Python tokenize - 3 examples found. These are the top rated real world Python examples of kaggle.tokenize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: knn.py Project: ageek/kaggle-1

def word_vectors(csv_file, vector_length, validation, word_to_skus=None, generated_sku_vectors=None):
	word_vects = {}
	words_index = 3
	queries = kaggle.slice(kaggle.file_to_array(csv_file, validation), words_index)
	for q in queries:
		formatted = kaggle.format_string(q)
		for word in kaggle.tokenize(formatted):
			if word not in word_vects:
				word_vects[word] = vector.random_vector(vector_length)

	return word_vects

Example #2

Show file

File: tf_idf.py Project: ageek/kaggle-1

def test_data(csv_file, class_labels_index, input_data_index, validation, items_count, ngram=1):
	array = kaggle.file_to_array(csv_file, validation)
	class_labels = kaggle.slice(array, class_labels_index)
	test_data = kaggle.slice(array, input_data_index)
	formatted_test_data = []
	for d in test_data:
		formatted = kaggle.format_string(d)
		tokens = kaggle.tokenize(formatted, ngram)
		formatted_test_data.append(tokens)
	if items_count != 'All':
		class_labels, formatted_test_data = class_labels[0:items_count], formatted_test_data[0:items_count]
	return class_labels, formatted_test_data

Example #3

Show file

File: best_buy.py Project: ageek/kaggle-1

def sku_to_searches():
	array = kaggle.file_to_array(training, "all")
	#array = array[0:10000:4] + array[1:10000:4] + array[2:10000:4]
	skus = kaggle.slice(array, 1)
	skus = set(skus)
	skus_searches = {}
	for sku in skus:
		skus_searches[sku] = []

	for line in array:
		sku = line[1]
		search = line[3]
		search = " ".join(kaggle.tokenize(search.lower()))
		search = re.sub("\"", '', search)
		skus_searches[sku].append(search)
	return skus_searches