def log_generate_RI_text(N, RI_letters, cluster_sz, ordered, text_name, n_gram_frequencies, alph=alphabet): # generate RI vector for "text_name" # assumes text_name has .txt text_vector = np.zeros((1, N)) text = utils.load_text_spaces(text_name) for char_num in xrange(len(text)): if char_num < cluster_sz: continue else: # build cluster cluster = "" for j in xrange(cluster_sz): cluster = text[char_num - j] + cluster # record cluster sighting to frequencies if cluster not in n_gram_frequencies[cluster_sz].keys(): n_gram_frequencies[cluster_sz][cluster] = 1 else: n_gram_frequencies[cluster_sz][cluster] += 1 text_vector += math.exp(-n_gram_frequencies[cluster_sz][cluster]) * id_vector( N, cluster, alph, RI_letters, ordered ) return text_vector
def generate_RI_text_words(N, RI_letters, text_name, alph=alphabet): # generate RI vector for "text_name" # assumes text_name has .txt text_vector = np.zeros((1, N)) text = utils.load_text_spaces(text_name) cluster = '' for char_num in xrange(len(text)): char = text[char_num] if char == ' ': text_vector += id_vector(N, cluster, alph, RI_letters) # reset cluster cluster = '' else: cluster += text[char_num] return text_vector
def generate_RI_text(N, RI_letters, cluster_sz, ordered, text_name, alph=alphabet): # generate RI vector for "text_name" # assumes text_name has .txt text_vector = Vector(N=N, beta=np.exp(1),empty=0)#np.zeros((1, N)) text = utils.load_text_spaces(text_name) for char_num in xrange(len(text)): if char_num < cluster_sz: continue else: # build cluster cluster = '' for j in xrange(cluster_sz): cluster = text[char_num - j] + cluster text_vector += id_vector(N, cluster, alph,RI_letters, ordered) return text_vector
def log_generate_RI_text(N, RI_letters, cluster_sz, ordered, text_name, \ n_gram_frequencies, alph=alphabet): # generate RI vector for "text_name" # assumes text_name has .txt text_vector = np.zeros((1, N)) text = utils.load_text_spaces(text_name) for char_num in xrange(len(text)): if char_num < cluster_sz: continue else: # build cluster cluster = '' for j in xrange(cluster_sz): cluster = text[char_num - j] + cluster #record cluster sighting to frequencies if cluster not in n_gram_frequencies[cluster_sz].keys(): n_gram_frequencies[cluster_sz][cluster] = 1 else: n_gram_frequencies[cluster_sz][cluster] += 1 text_vector += math.exp(-n_gram_frequencies[cluster_sz][cluster])*id_vector(N, cluster, alph,RI_letters, ordered) return text_vector