Esempio n. 1
0
def main():
    prepareInput.createInput(logName)

    scores=[]
    #----------start Trace2Vec
    Trace2Vec.learn(logName,vectorsize)
    y=Trace2Vec.getY(logName)
    vectors, corpus=Trace2Vec.startCluster(logName, vectorsize)
    printMatrix(vectors, "Trace2Vec", "vectors")
    for alg in clustering:
        assigned_clusters=cluster(alg, vectors, y)
        printVector(assigned_clusters, "Trace2Vec", "clusters", alg)
        Trace2Vec.endCluster(logName, assigned_clusters, vectorsize, alg, corpus)
    #----------end Trace2Vec

    #----------start Node2Vec
    args=Node2Vec.parse_args()
    args.input="input/"+logName+".graph"
    args.output="output/"+logName+"N2VVS"+str(vectorsize)+".node2vec"
    nx_G = Node2Vec.read_graph(args)
    G = node2vec.Graph(nx_G, True, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    Node2Vec.learn_embeddings(args, logName, vectorsize, walks)
    Node2Vec.extract(logName, vectorsize)
    
    y=Node2Vec.getY(logName)
    vectors, corpus=Node2Vec.startCluster(logName, vectorsize)
    printMatrix(vectors, "Node2Vec", "vectors")
    for alg in clustering:
        assigned_clusters=cluster(alg, vectors, y)
        printVector(assigned_clusters, "Node2Vec", "clusters", alg)
        Node2Vec.endCluster(logName, assigned_clusters, vectorsize, alg, corpus)
    #----------end Node2Vec

    #----------start NGrams
    vectors, y=NGrams.ngrams_BPI_2015(logName, vectorsize)
    printMatrix(vectors, "NGrams", "vectors")
    for alg in clustering:
        assigned_clusters=cluster(alg, vectors, y)
        printVector(assigned_clusters, "NGrams", "clusters", alg)
        NGrams.endCluster(logName, assigned_clusters, vectorsize, alg, [0]*len(vectors))
    #----------end NGrams

    scores.append(get_scores("Trace2Vec"))
    scores.append(get_scores("Node2Vec"))
    scores.append(get_scores("NGrams"))

    for score in scores:
        print_scores(score)
    
    if vectorsize==2:
        for emb in embed:
            myPlot.plot(emb)
Esempio n. 2
0
 def get_salt_file_character_n_grams():
     char_n_grams = NGrams.get_character_n_grams(NGramHash.SALT_FILE_NAME,
                                                 NGramHash.N_GRAMS_SIZE)
     char_n_gram_set = set()
     for n_gram in char_n_grams:
         char_n_gram_set.add(n_gram)
     return list(char_n_gram_set)
Esempio n. 3
0
def get_character_n_gram_set(file_name, n):
    char_n_grams = NGrams.get_character_n_grams(
        FileIo.get_text_file_contents(file_name), n)
    char_n_gram_set = set()
    for n_gram in char_n_grams:
        char_n_gram_set.add(n_gram)
    return char_n_gram_set
Esempio n. 4
0
def get_word_n_gram_set(file_name, n):

    word_n_grams = NGrams.get_word_n_grams(
        FileIo.get_text_file_contents(file_name), n)
    word_n_gram_set = set()
    for n_gram in word_n_grams:
        word_n_gram_set.add(n_gram)
    return word_n_gram_set
Esempio n. 5
0
	def evaluateLine(self, line):
		print "Evaluating", line
		words = line.split(' ')
		currentOrder = 1
		index = 0
		while (index + currentOrder) < len(words):				
			phrase = ""
			for i in xrange(index, currentOrder + index):
				phrase = phrase + " " + words[i]
			phrase = phrase.strip()
			possibleWord = words[i+1]
			#print phrase, possibleWord, currentOrder+1
			nGramMap = NGrams.getProbabilities(phrase, [possibleWord], currentOrder+1) #returns an array of maps -- need to convert
			nGramProbability = nGramMap[0]["probability"]
			#print phrase, possibleWord, nGramProbability
			if nGramProbability < -3.75:
				return False
			if currentOrder == 4:
				index += 1
			if currentOrder < 4:
				currentOrder += 1
		return True
 def __get_salt_file_character_n_grams(self):
     char_n_grams = NGrams.get_character_n_grams(FileIo.get_text_file_contents(SALT_FILE_NAME), N_GRAMS_SIZE)
     char_n_gram_set = set()
     for n_gram in char_n_grams:
         char_n_gram_set.add(n_gram)
     return list(char_n_gram_set)
Esempio n. 7
0
	def getNewLine(self, PoS, editedLine, transformedText, rhymeScheme, meter, newTheme, oldTheme, currentLineNumber): #magic happens
		newLine = ""
		allwords = self.ctr.getAllWords()
		originalPoS = ["_"+PoS[i][1] for i in range(len(PoS))]
		newLine = ""
		i = 0
		for word, part in zip(editedLine.split(), originalPoS):
			newWord = ""
			if word == "_":
				if part == "_NNP":
					part = "_NN"
				elif part == "_NNPS":
					part = "_NNS"
				allwords = self.ctr.getWordsWithEmphasis(meter[i])
				tempWords = [word for word in allwords if self.robotBrain.get_popularity(word) > 175000 and word not in self.nameList.names and self.robotBrain.get_most_likely_POS_tag(word) == part]
				if len(tempWords) != 0:
					allwords = tempWords
				else:
					print("failed", meter[i], part)
					newLine = newLine + allwords[random.randint(0, len(allwords)-1)]
					i = i + 1
					continue
				# if we are at the last word and the current line is not the first rhyming line in the series. ie [0,0,2,2] index != 1 || index != 3
				if i == len(editedLine.split())-1 and rhymeScheme[currentLineNumber] != currentLineNumber:
					# we retrieve the first line in the current ryhme series. ie [['hi', 'guys'],['burgers', 'fries'], ['spies', 'lies']], we would retrieve ['hi', 'guys']
					transformedLineBefore = transformedText[rhymeScheme[currentLineNumber]]
					# retrieve the last word from transformedLineBefore
					wordToRhyme = transformedLineBefore.split()[len(transformedLineBefore.split())-1]
					# retrieve all the rhyming words
					rhymes = self.rhymeDictionary.getRhymes(wordToRhyme)
					rhymes.append(wordToRhyme)
					if len(set(allwords) & set(rhymes)) > 0:
						# combine the rhyming words with the words that have the proper meter
						rhymes = list(set(allwords) & set(rhymes))
					# randomly select a word from this concatenated list; may not necessarily rhyme
					similarity = []
					for word in rhymes:
						similarity.append(self.get_cosine_similarity(word, newTheme))
					similarity = np.asarray(similarity)
					if similarity.min() < 0:
						similarity = similarity - similarity.min()
					if sum(similarity) == 0:
						similarity = np.ones(similarity.shape)
					similarity = similarity / sum(similarity)
					if newLine != "" or currentLineNumber == 0:
						probabilities = NGrams.getNGramProbabilities(newLine, rhymes)
					else:
						probabilities = NGrams.getNGramProbabilities(transformedText[currentLineNumber-1], rhymes)
					similarity = (similarity + 4 * probabilities) / 5.
					index = np.argmax(np.random.multinomial(1, similarity))
					while similarity[index] < 1./len(similarity):
						index = np.argmax(np.random.multinomial(1, similarity))
					newWord = rhymes[index]
				elif i == len(editedLine.split()) - 1:
					newWord = ""
					possibleWords = [w for w in allwords if self.rhymeDictionary.wordList.has_key(w)]
					probabilities = np.asarray(NGrams.getNGramProbabilities(newLine, possibleWords))
					index = np.argmax(np.random.multinomial(1, probabilities))
					while probabilities[index] < 1./len(probabilities):
						index = np.argmax(np.random.multinomial(1, probabilities))
					newWord = possibleWords[index]
				else:
					similarity = []
					for word in allwords:
						similarity.append(self.get_cosine_similarity(word, newTheme))
					similarity = np.asarray(similarity)
					if similarity.min() < 0:
						similarity = similarity - similarity.min()
					if sum(similarity) == 0:
						similarity = np.ones(similarity.shape)
					similarity = similarity / sum(similarity)
					if newLine != "" or currentLineNumber == 0:
						probabilities = NGrams.getNGramProbabilities(newLine, allwords)
					else:
						probabilities = NGrams.getNGramProbabilities(transformedText[currentLineNumber-1], allwords)
					similarity = (similarity + 4 * probabilities) / 5.
					index = np.argmax(np.random.multinomial(1, similarity))
					while similarity[index] < 1./len(similarity):
						index = np.argmax(np.random.multinomial(1, similarity))
					newWord = allwords[index]
				newLine = newLine + " " + newWord
			else:
				newLine = newLine + " " + word
			i = i + 1
		return newLine.strip()