コード例 #1
0
def log_generate_RI_text(N, RI_letters, cluster_sz, ordered, text_name, n_gram_frequencies, alph=alphabet):
    # generate RI vector for "text_name"
    # assumes text_name has .txt

    text_vector = np.zeros((1, N))
    text = utils.load_text_spaces(text_name)
    for char_num in xrange(len(text)):

        if char_num < cluster_sz:
            continue
        else:
            # build cluster
            cluster = ""
            for j in xrange(cluster_sz):
                cluster = text[char_num - j] + cluster
            # record cluster sighting to frequencies

            if cluster not in n_gram_frequencies[cluster_sz].keys():
                n_gram_frequencies[cluster_sz][cluster] = 1
            else:
                n_gram_frequencies[cluster_sz][cluster] += 1
            text_vector += math.exp(-n_gram_frequencies[cluster_sz][cluster]) * id_vector(
                N, cluster, alph, RI_letters, ordered
            )
    return text_vector
コード例 #2
0
def generate_RI_text_words(N, RI_letters, text_name, alph=alphabet):
		# generate RI vector for "text_name"
		# assumes text_name has .txt

		text_vector = np.zeros((1, N))
		text = utils.load_text_spaces(text_name)
		cluster = ''
		for char_num in xrange(len(text)):
				char = text[char_num]
				if char == ' ':
						text_vector += id_vector(N, cluster, alph, RI_letters)
						# reset cluster
						cluster = ''
				else:
						cluster += text[char_num]
		return text_vector
コード例 #3
0
def generate_RI_text_words(N, RI_letters, text_name, alph=alphabet):
		# generate RI vector for "text_name"
		# assumes text_name has .txt

		text_vector = np.zeros((1, N))
		text = utils.load_text_spaces(text_name)
		cluster = ''
		for char_num in xrange(len(text)):
				char = text[char_num]
				if char == ' ':
						text_vector += id_vector(N, cluster, alph, RI_letters)
						# reset cluster
						cluster = ''
				else:
						cluster += text[char_num]
		return text_vector
コード例 #4
0
def generate_RI_text(N, RI_letters, cluster_sz, ordered, text_name, alph=alphabet):
		# generate RI vector for "text_name"
		# assumes text_name has .txt

		text_vector = Vector(N=N, beta=np.exp(1),empty=0)#np.zeros((1, N))
		text = utils.load_text_spaces(text_name)
		for char_num in xrange(len(text)):

				if char_num < cluster_sz:
						continue
				else:
						# build cluster
						cluster = ''
						for j in xrange(cluster_sz):
								cluster = text[char_num - j] + cluster
						text_vector += id_vector(N, cluster, alph,RI_letters, ordered)
		return text_vector
コード例 #5
0
def generate_RI_text(N, RI_letters, cluster_sz, ordered, text_name, alph=alphabet):
		# generate RI vector for "text_name"
		# assumes text_name has .txt

		text_vector = Vector(N=N, beta=np.exp(1),empty=0)#np.zeros((1, N))
		text = utils.load_text_spaces(text_name)
		for char_num in xrange(len(text)):

				if char_num < cluster_sz:
						continue
				else:
						# build cluster
						cluster = ''
						for j in xrange(cluster_sz):
								cluster = text[char_num - j] + cluster
						text_vector += id_vector(N, cluster, alph,RI_letters, ordered)
		return text_vector
コード例 #6
0
def log_generate_RI_text(N, RI_letters, cluster_sz, ordered, text_name, \
n_gram_frequencies, alph=alphabet):
    # generate RI vector for "text_name"
    # assumes text_name has .txt

    text_vector = np.zeros((1, N))
    text = utils.load_text_spaces(text_name)
    for char_num in xrange(len(text)):

        if char_num < cluster_sz:
            continue
        else:
            # build cluster
            cluster = ''
            for j in xrange(cluster_sz):
                cluster = text[char_num - j] + cluster
            #record cluster sighting to frequencies

            if cluster not in n_gram_frequencies[cluster_sz].keys():
                n_gram_frequencies[cluster_sz][cluster] = 1
            else:
                n_gram_frequencies[cluster_sz][cluster] += 1
            text_vector += math.exp(-n_gram_frequencies[cluster_sz][cluster])*id_vector(N, cluster, alph,RI_letters, ordered)
    return text_vector