Ejemplo n.º 1
0
	def find_clust_similar_sent(self):
		#to find out the sentence most similar to the cluster
		clust_sent_sim = helper.similarity(self.clust_weight,self.sent_weight)
		#print clust_sent_sim
		clust_sent = []
		for sim in clust_sent_sim:
			#max_val = max(sim)
			max_index = max(sim.iteritems(), key=operator.itemgetter(1))[0]
			if max_index not in clust_sent:
				clust_sent.append(max_index)
		#print clust_sent
		self.clust_sentences = clust_sent	
Ejemplo n.º 2
0
	def __init__(self,docs,num_clu):
		self.no_clusters = num_clu
		#self.sentences =  preprocessing.load_sentences(docs)
		self.sentences =  preprocessing.load_duc_xml(docs)
		self.sent_no_swords = preprocessing.remove_stopwords(self.sentences)
		#self.full_doc = helper.fulldoc(self.sentences)
		#self.sent_no_swords.append(self.full_doc)
		self.unique_terms = helper.uniqueterms(self.sent_no_swords)
		self.sent_weight = helper.tfisf(self.sent_no_swords,self.unique_terms)
		#self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms)
		self.sent_similarity = helper.similarity(self.sent_weight,self.sent_weight)
		self.clusters = cluster.kmedoid(self.sent_similarity,self.no_clusters)
Ejemplo n.º 3
0
 def find_clust_similar_sent(self):
     #to find out the sentence most similar to the cluster
     clust_sent_sim = helper.similarity(self.clust_weight, self.sent_weight)
     #print clust_sent_sim
     clust_sent = []
     for sim in clust_sent_sim:
         #max_val = max(sim)
         max_index = max(sim.iteritems(), key=operator.itemgetter(1))[0]
         if max_index not in clust_sent:
             clust_sent.append(max_index)
     #print "Sentence most similar to clusters..."
     #print clust_sent
     self.clust_sentences = clust_sent
def get_most_similar(fv1, fv2):

    s = 0
    x, y = None, None
    for i in range(len(fv1)):
        for j in range(len(fv2)):
            si = helper.similarity(fv1[i], fv2[j])

            if si > s:
                x = i
                y = j
                s = si

    return x, y, s
Ejemplo n.º 5
0
	def __init__(self,docs,num_clu):
		self.no_clusters = num_clu
		print "Loading Sentences..."
		self.sentences =  preprocessing.load_sentences(docs)
		print "Preprocessing..."
		self.sent_no_swords = preprocessing.remove_stopwords(self.sentences)
		self.unique_terms = helper.uniqueterms(self.sent_no_swords)
		self.sent_weight = helper.tfisf(self.sent_no_swords,self.unique_terms)
		#self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms)
		print "Finding Similarity Graph..."
		self.sent_similarity = helper.similarity(self.sent_weight,self.sent_weight)
		print "Clustering..."
		self.clusters = cluster.kmedoid(self.sent_similarity,self.no_clusters)
		'''
Ejemplo n.º 6
0
 def __init__(self, docs, num_clu):
     self.no_clusters = num_clu
     print "Loading Sentences..."
     self.sentences = preprocessing.load_sentences(docs)
     print "Preprocessing..."
     self.sent_no_swords = preprocessing.remove_stopwords(self.sentences)
     self.unique_terms = helper.uniqueterms(self.sent_no_swords)
     self.sent_weight = helper.tfisf(self.sent_no_swords, self.unique_terms)
     #self.sent_weight = helper.word_vector(self.sent_no_swords,self.unique_terms)
     print "Finding Similarity Graph..."
     self.sent_similarity = helper.similarity(self.sent_weight,
                                              self.sent_weight)
     print "Clustering..."
     self.clusters = cluster.kmedoid(self.sent_similarity, self.no_clusters)
     '''
def match_level(pv1, pv2, fv1, fv2):

    ml = np.zeros((len(pv1), len(pv2)))

    for i in range((len(pv1))):
        for j in range((len(pv2))):
            if np.all(np.abs(pv1[i] - pv2[j]) > const.BG):
                continue

            ml[i, j] = 0.5 + (0.5 * helper.similarity(fv1[i], fv2[j]))

    # ml_prime = np.zeros((len(pv1),len(pv2)))

    # for i,row in enumerate(ml):
    # 	j = np.argmax(row)
    # 	ml_prime[i,j] = row[j]

    # ml = ml_prime
    # ml_prime = np.zeros((len(pv1),len(pv2)))

    # for j,col in enumerate(ml.T):
    # 	i = np.argmax(col)
    # 	ml_prime[i,j] = col[i]

    # print(ml_prime)
    sum = 0
    count = 0
    while ml.any() != 0:
        ind = np.argmax(ml)
        x, y = ind // len(pv2), ind % (len(pv2))
        if ml[x, y] != 0.5: sum = sum + ml[x, y]
        # sum = sum + ml[x,y]
        # print(ml[x,y])
        ml[x] = 0
        ml[:, y] = 0
        count = count + 1

    return (sum / count)
Ejemplo n.º 8
0
def nivel_similitud(pv1, pv2, fv1, fv2):

    ml = np.zeros((len(pv1), len(pv2)))

    for i in range((len(pv1))):
        for j in range((len(pv2))):
            if np.all(np.abs(pv1[i] - pv2[j]) > const.BG):
                continue

            ml[i, j] = 0.5 + (0.5 * helper.similarity(fv1[i], fv2[j]))

    sum = 0
    count = 0
    while ml.any() != 0:
        ind = np.argmax(ml)
        x, y = ind // len(pv2), ind % (len(pv2))
        if ml[x, y] != 0.5: sum = sum + ml[x, y]

        ml[x] = 0
        ml[:, y] = 0
        count = count + 1

    return (sum / count)