# Create p_stemmer of class PorterStemmer
p_stemmer = PorterStemmer()

num_topics = 50
passes = 20
topn = 10
k = 5000
N = 10000
# cluster_sizes is mapping to n-gram size
# cluster_sz in random_idx referring to specific element (int) in cluster_sizes, array
cluster_sizes = [1, 2, 3, 4, 5, 6, 7, 8]
ordered = 1
#assuming this is the alphabet bc of precedent in generate_text.py
#alph = 'abc'
alphabet = string.lowercase + ' '
RI_letters = random_idx.generate_letter_id_vectors(N, k, alphabet)


def create_doc_set(path, files):
    doc_set = []
    for filename in files:
        f = open(path + filename, "r")
        doc_set.append(f.read())
        f.close()
    return doc_set


def tokenize(doc_set):
    # list for tokenized documents in loop
    texts = []
    # loop through document list
# Create p_stemmer of class PorterStemmer
p_stemmer = PorterStemmer()

num_topics = 50
passes = 20
topn = 10
k = 5000
N = 10000
# cluster_sizes is mapping to n-gram size
# cluster_sz in random_idx referring to specific element (int) in cluster_sizes, array
cluster_sizes = [1, 2, 3, 4, 5, 6, 7, 8]
ordered = 1
# assuming this is the alphabet bc of precedent in generate_text.py
# alph = 'abc'
alphabet = string.lowercase + " "
RI_letters = random_idx.generate_letter_id_vectors(N, k, alphabet)


def create_doc_set(path, files):
    doc_set = []
    for filename in files:
        f = open(path + filename, "r")
        doc_set.append(f.read())
        f.close()
    return doc_set


def tokenize(doc_set):
    # list for tokenized documents in loop
    texts = []
    # loop through document list
Ejemplo n.º 3
0
	return trellis


"""
The interface that you should interact with
"""
def wrapper_garden(s, n, training = 0):
	return garden_path(s, postprocessed_array1, postprocessed_array2, n, "", training)



try:
	postprocessed_array1 = np.load('../output/log/preprocessedngrams_100k.p')
	lv1 = np.load('../output/log/lv_100k.p')
except Exception as e:
	lv1 = ri.generate_letter_id_vectors(N,N/2);
	lv1.dump('../output/log/lv_100k.p')
	postprocessed_array1 = np.array(llv.load_ngram_vector(text, lv1, 6)) #creates the array up to 5 grams
	postprocessed_array1.dump('../output/log/preprocessedngrams_100k.p');
	print "finished generating"

try:
	postprocessed_array2 = np.load('../output/log/preprocessedngrams_2_100k.p')
	lv2 = np.load('../output/log/lv_2_100k.p')
except Exception as e:
	lv2 = ri.generate_letter_id_vectors(N,N/2);
	lv2.dump('../output/log/lv_2_100k.p')
	postprocessed_array2 = np.array(llv.load_ngram_vector(text, lv2, 6)) #creates the array up to 5 grams
	postprocessed_array2.dump('../output/log/preprocessedngrams_2_100k.p');
	print "finished generating"