Python Similarityの例、Similarity Pythonの例

コード例 #1

0

ファイルを表示

def calculate_rankings(sketches):
    n_genomes = len(sketches)

    jaccard_matrix = np.zeros((n_genomes,n_genomes))
    sd_matrix = np.zeros((n_genomes,n_genomes))
    forbes_matrix = np.zeros((n_genomes,n_genomes))

    print('Calculating pairwise similarities...')
    for i in range(n_genomes):
        for j in range(i, n_genomes):
            h1 = sketches[i]
            h2 = sketches[j]
            union = Similarity.union(h1, h2).cardinality()
            a_excl, b_excl, intersection = Similarity.getJointEstimators(h1, h2)
            a = h1.cardinality()
            b = h2.cardinality()

            jaccard = intersection / union
            sd = 2*intersection/(a + b)
            forbes = (intersection*union)/(intersection*union + 1.5*a_excl*b_excl)

            jaccard_matrix[i, j] = jaccard
            sd_matrix[i, j] = sd
            forbes_matrix[i, j] = forbes

    jaccard_rankings = GenomeRankings.rank_genomes(jaccard_matrix)
    sd_rankings = GenomeRankings.rank_genomes(sd_matrix)
    forbes_rankings = GenomeRankings.rank_genomes(forbes_matrix)
    return jaccard_rankings, forbes_rankings, sd_rankings

コード例 #2

0

ファイルを表示

def davies_bouldin(self):
    """
	This method computes the davies-bouldin (db) of a given clustering.
	:return: the davies bouldin value of the clustering
	"""
    # get the average internal cluster distances
    cluster_averages = self.cluster_averages()
    # create variable for db
    davies_bouldin = 0.0
    s = Similarity(self.e)
    # for each cluster / centroid i
    for i in range(self.solution.num_clusters):
        # for each cluster / centroid j
        for j in range(self.solution.num_clusters):
            # when i and j are not the same cluster / centroid
            if j != i:
                # calculate the distance between the two centroids of i and j
                d_ij = s.fractional_distance(self.solution.centroids[i],
                                             self.solution.centroids[j])
                # update the variable to equal to sum of internal cluster distances of clusters i and j divided by
                # the previously computer value i.e. the distance between centroid i and centroid j
                d_ij = (cluster_averages[i] + cluster_averages[j]) / d_ij
                # update db is this is larger than any db seen before
                davies_bouldin = max(d_ij, davies_bouldin)
    return davies_bouldin

コード例 #3

0

ファイルを表示

 def averageSimilarityFeatureExtractor(self, originalDocuments,
                                       machineSummary, humanSummaries):
     simWithOD = s.calculateAverageSimilarity(machineSummary,
                                              originalDocuments)
     simWithHS = s.calculateAverageSimilarity(machineSummary,
                                              humanSummaries)
     return [simWithOD, simWithHS]

コード例 #4

0

ファイルを表示

ファイル: Network.py プロジェクト: eastene/CSCI5559-Similarity-Network

 def computeSimilarity(self):
     """
     compute the similarity between nodes using the distances computed earlier
     :return: none
     """
     if self.verbose:
         print("Computing Similarities...", end=" ", flush=True)
     # 2d array to hold all distances and relations ids
     distances = []
     ids = []
     # get list of patient ids
     nodes = self.conn.getSortedIDList()
     for i in range(len(nodes)):
         # compute average distance to neighbors for node i
         xi_N = self.conn.getPatientRelations(nodes[i][0])
         distances.append([x[2] for x in xi_N])
         ids.append([x[1] for x in xi_N])
     # compute the similarities
     buffer = Similarity.measure(ids, distances)
     # write the similarities to disk
     if self.verbose:
         print("Done.")
         print("Writing Similarities...", end=" ", flush=True)
     self.conn.updateRelationsFromBuffer(buffer)
     if self.verbose:
         print("  Done.")

コード例 #5

0

ファイルを表示

 def parseCreatorFile(self, fileName, delimiter):
     """
     Parse first data type to create the nodes for the graph
     :param fileName: file from which to read (.csv)
     :param delimit: delimiter of file
     :return: none
     """
     task = threading.Thread(target=self.allocateNodes(fileName, delimiter))
     task.start()
     # new reader to measure the distances
     with open(fileName, 'r') as f_in:
         if self.verbose:
             print("Calculating Distances...", end=" ", flush=True)
         reader = csv.reader(f_in, delimiter=delimiter)
         # skip headers
         next(reader)
         patients = [row for row in reader if row != []]
         ids = [row[0] for row in patients]
         buffer = Similarity.initialDistance(ids, patients)
         # wait until all new nodes are allocated
         task.join()
         if self.verbose:
             print(" Done.")
             print("Writing Distances...", end=" ", flush=True)
         self.conn.addRelationsFromBuffer(buffer)
         self.files_read += 1
         if self.verbose:
             print("     Done.")

コード例 #6

0

ファイルを表示

ファイル: phylosim.py プロジェクト: wlin-12/independent-work

def sequenceSimilarities(subtrees):
    # print(subtrees)
    length = len(subtrees)
    maxSimilarity = -1
    maxTreeX = ""
    maxNodeX = ""
    maxTreeY = ""
    maxNodeY = ""
    for fileA, sequencesA in sorted(subtrees.iteritems()):
        for fileB, sequencesB in sorted(subtrees.iteritems()):
            # print(fileA)
            # print(fileB)
            if fileA != fileB:
                root = subtrees[fileA]['ROOT']
                for sequence in subtrees[fileB]:
                    similarity = Similarity.domainSim(
                        root, subtrees[fileB][sequence])
                    if similarity > maxSimilarity:
                        maxSimilarity = similarity
                        maxTreeX = fileA
                        maxNodeX = 'ROOT'
                        maxTreeY = fileB
                        maxNodeY = sequence
    # indices = sorted([(maxTreeX, maxNodeX), (maxTreeY, maxNodeY)], key=lambda x: x[0])
    return maxTreeX, maxTreeY, maxNodeY, maxSimilarity

コード例 #7

0

ファイルを表示

def SimilarityIndexes(expList, simList, indextype, indexname):
    index = 0
    index_sum = 0
    number = 0

    for i in range(len(expList)):
        for j in range(len(simList)):
            index = Similarity.SimilarityIndex(expList[i], simList[j],
                                               indextype)
            if index != 0:
                index_sum += index
                number += 1
    outdir = './ResultData'
    if not os.path.exists(outdir):
        os.mkdir(outdir)

    pd.DataFrame(expList).to_csv(os.path.join(
        outdir, 'explist-' + indextype + indexname + '.txt'),
                                 mode='a',
                                 sep=' ')
    pd.DataFrame(simList).to_csv(os.path.join(
        outdir, 'simlist-' + indextype + indexname + '.txt'),
                                 mode='a',
                                 sep=' ')
    # pd.DataFrame(simList).to_csv(outdir + '/explist-' + indextype + indexname + '.txt', mode='a', sep=' ')
    if number == 0:
        number = 1
    return index_sum / number

コード例 #8

0

ファイルを表示

ファイル: Evaluation.py プロジェクト: Monk9636/Pedestrian-Model-Evaluation

def SimilarityIndexes(expList, simList, indextype, indexname):
    index = 0
    index_sum = 0
    number = 0
    index_max = 0

    for i in range(len(expList)):
        for j in range(len(simList)):
            index = Similarity.SimilarityIndex(expList[i], simList[j],
                                               indextype)
            if index != 0:
                index_sum += index
                number += 1
            index_max = max(index, index_max)
    pd.DataFrame(expList).to_csv(r'ResultData//explist' + indextype +
                                 indexname + '.txt',
                                 mode='a',
                                 sep=' ')
    pd.DataFrame(simList).to_csv(r'ResultData//simlist' + indextype +
                                 indexname + '.txt',
                                 mode='a',
                                 sep=' ')
    if number == 0:
        number = 1
    index = index_sum / number
    return index

コード例 #9

0

ファイルを表示

ファイル: CCM.py プロジェクト: breecummins/StateSpace

 def estSeries(M, ts):
     est = np.zeros(ts.shape)
     for k in range(M.shape[0]):
         poi = M[k, :]
         dists, inds = Similarity.findClosestInclusive(poi, M, numlags + 1)
         w = wgtfunc(np.array(dists))
         est[k] = (w * ts[list(inds)]).sum()
     return est

コード例 #10

0

ファイルを表示

    def __init__(self, indirizzo, doc):
        # Take as input the address of the feedback ("TIME.REL") and a VSM.
        # The function fills up a dictionary: the key is the queryID and the values will be a list of relevant documentsID for each.

        self.VSM = doc
        self.SIM = SI.Similarity(doc)
        lines = [line.strip() for line in open(indirizzo)]
        lines = list(filter(None, lines))
        for i in range(0, doc.numquery):
            l = [int(s) for s in " ".join(lines[i].split()).split(' ')]
            self.relevance[l[0]] = l[1:]

コード例 #11

0

ファイルを表示

ファイル: Tester.py プロジェクト: yaozh16/SequenceSimilarity

def plotSerials(plotlist, methods):
    testFile = join("Test", "pattern_Test.csv")

    kpi = loadTestKPI(testFile)

    targetMap, serialMatrix = loadTest(testFile)

    s1 = serialMatrix[plotlist[0]]
    s2 = serialMatrix[plotlist[1]]

    SL = [serialMatrix[i] for i in plotlist]
    NL = ['serial{0}'.format(i) for i in plotlist]
    TL = [targetMap[i] for i in plotlist]
    offsets = {}

    dtw_ss = [e for e in methods if "dtw_s" in e]
    for m in dtw_ss:
        s = Similarity.Similarity(s1, s2)
        NL.append('serial{0} shifted by {1}({2})'.format(
            plotlist[-1], m, s.use_method(m)))
        TL.append(targetMap[plotlist[-1]])
        SL.append(s2 + s.bestShiftY)

    dtw_ss = [e for e in methods if e.startswith('m')]
    for m in dtw_ss:
        s = Similarity.Similarity(s1, s2)
        TL.append(targetMap[plotlist[-1]])
        NL.append('serial{0} shifted by {1} :({2})'.format(
            plotlist[-1], m, s.use_method(m)))
        if (s.bestShiftX > 0):
            offsets[len(SL)] = s.bestShiftX
            SL.append(s2[:-s.bestShiftX])
        else:
            SL.append(s2[-s.bestShiftX:])
    CommonOperation.plotSerials(SL,
                                NL,
                                TL,
                                plotList=range(len(SL)),
                                lowerbound=0,
                                offsets=offsets)

コード例 #12

0

ファイルを表示

ファイル: gui_manager.py プロジェクト: Fumiya-K/ros_myo

 def params(self):
     self.tb_defalt = "new pose name or filename to load and save"
     self.Min = 0.95
     self.Posenames = []
     self.finger_state = []
     self.max_power = []
     self.sim = Similarity.Similarity()
     self.st_flg = False
     self.file_name = ""
     self.portname = "/dev/ttyACM1"
     self.baudrate = 115200
     self.connected = False
     self.rpy = {"roll": 0, "pitch": 1, "yaw": 2}

コード例 #13

0

ファイルを表示

def estManifold(Mx,My,wgtfunc):
    '''
    Estimate My from Mx.

    '''
    Mest=np.zeros(My.shape)
    for k in range(Mx.shape[0]):
        poi = Mx[k,:]
        dists,inds = Similarity.findClosestInclusive(poi,Mx,Mx.shape[1]+1)
        w = wgtfunc(np.array(dists))
        pts = [My[j,:] for j in inds]
        Mest[k,:] = sum([w[j]*pts[j] for j in range(len(w))])
    return Mest

コード例 #14

0

ファイルを表示

def forbes(num_cards, base_start, base_stop, num_trials, exp_forbes,
           read_lengths):
    # num_cards = 50
    cardinalities = np.logspace(
        base_start, base_stop,
        num_cards)  # Generates a range of cardinalities for set generation
    plot = np.zeros(num_cards)
    for i in range(num_cards):
        print('Starting ' + str(i + 1) + ' out of ' + str(num_cards))
        card = int(cardinalities[i])
        # do 10 trials for each cardinality
        results = np.zeros(num_trials)
        for j in range(num_trials):
            h1 = HLL(12)
            h2 = HLL(12)
            # Generate reads of length 40 based on the expected Forbes, 0.1
            a, b, exp_forbes = Rangen_forbes.generate_reads(
                exp_forbes, card, card, read_lengths)
            for s in a:
                h1.insert(s)
            for s in b:
                h2.insert(s)
            union = Similarity.union(h1, h2).cardinality()
            intersection = Similarity.intersection(h1, h2)
            b_size, c_size, a_size = Similarity.getJointEstimators(h1, h2)
            obs_forbes = (intersection * union) / (
                (intersection * union) + 3 / 2 * (b_size * c_size))
            error = 100 * (obs_forbes - exp_forbes) / exp_forbes
            results[j] = error
        plot[i] = np.mean(results)

    print(plot)
    plt.xscale('log')
    plt.title('Forbes (set at 0.1) Accuracy for reads of length 40')
    plt.xlabel('Cardinality')
    plt.ylabel('% Error (mean of 10)')
    plt.ylim(-100, 100)
    plt.scatter(cardinalities, plot)
    plt.show()

コード例 #15

0

ファイルを表示

def jaccard(num_cards, base_start, base_stop, num_trials, exp_jaccard,
            read_lengths):
    cardinalities = np.logspace(
        base_start, base_stop,
        num_cards)  # Generates a range of cardinalities for set generation
    plot = np.zeros(num_cards)
    for i in range(num_cards):
        print('Starting ' + str(i + 1) + ' out of ' + str(num_cards))
        card = int(cardinalities[i])
        # do 10 trials for each cardinality
        results = np.zeros(num_trials)
        for j in range(num_trials):
            h1 = HLL(12)
            h2 = HLL(12)
            # Generate reads of length 40 based on the expected Jaccard, 0.02
            a, b, exp_jaccard, forbes, sd = Rangen_jaccard.generate_reads(
                exp_jaccard, card, card, read_lengths)
            for s in a:
                h1.insert(s)
            for s in b:
                h2.insert(s)
            # Union and intersection calculations for Jaccard calculations
            union = Similarity.union(h1, h2).cardinality()
            intersection = Similarity.intersection(h1, h2)
            obs_jaccard = intersection / union
            error = 100 * (
                obs_jaccard - exp_jaccard
            ) / exp_jaccard  # Expected Jaccard is 0.02, error calculation
            results[j] = error
        plot[i] = np.mean(results)
    print(plot)  # Percent Errors being displayed
    plt.xscale('log')
    plt.title('Jaccard (set at 0.02) Accuracy for reads of length 40')
    plt.xlabel('Cardinality')
    plt.ylabel('% Error (mean of 10)')
    plt.ylim(-100, 100)
    plt.scatter(cardinalities, plot)
    plt.show()

コード例 #16

0

ファイルを表示

ファイル: clustering-sklearn.py プロジェクト: gtrdp/twitter-clustering

def davies_bouldin(self):
	"""
	This method computes the davies-bouldin (db) of a given clustering.
	:return: the davies bouldin value of the clustering
	"""
	# get the average internal cluster distances
	cluster_averages = self.cluster_averages()
	# create variable for db
	davies_bouldin = 0.0
	s = Similarity(self.e)
	# for each cluster / centroid i
	for i in range(self.solution.num_clusters):
		# for each cluster / centroid j
		for j in range(self.solution.num_clusters):
			# when i and j are not the same cluster / centroid
			if j != i:
				# calculate the distance between the two centroids of i and j
				d_ij = s.fractional_distance(self.solution.centroids[i], self.solution.centroids[j])
				# update the variable to equal to sum of internal cluster distances of clusters i and j divided by
				# the previously computer value i.e. the distance between centroid i and centroid j
				d_ij = (cluster_averages[i] + cluster_averages[j]) / d_ij
				# update db is this is larger than any db seen before
				davies_bouldin = max(d_ij, davies_bouldin)
	return davies_bouldin

コード例 #17

0

ファイルを表示

ファイル: Entry.py プロジェクト: kimjongdill/builtWorlds2018

def scoreHandler(event, context):
    file1Url = event["file1"]
    file2Url = event["file2"]

    file1 = requests.get(file1Url)
    file2 = requests.get(file2Url)

    if file1.status_code == 200 and file2.status_code == 200:
        sim = Similarity.Similarity()
        pdf = ConvertPdf.ConvertPDF()
        fileStream1 = io.BytesIO(file1.content)
        fileStream2 = io.BytesIO(file2.content)
        text1 = pdf.convertPDF(fileStream1)
        text2 = pdf.convertPDF(fileStream2)
        returnVal = {}
        returnVal["sim"] = sim.similarity(text1, text2)

        return json.dumps(returnVal)

    return 0

コード例 #18

0

ファイルを表示

def test_main(filename_s, filename_e, filename_a):
    wordlist_s = TextPreprocess.Tokens(filename_s)
    wordlist_e = TextPreprocess.Tokens(filename_e)

    dict_s = Process.DictBiulder(wordlist_s)
    dict_e = Process.DictBiulder(wordlist_e)

    Fin_dict = Process.MergeDict(dict_s, dict_e)

    V1 = Features.GetVector(dict_s, Fin_dict)
    V2 = Features.GetVector(dict_e, Fin_dict)

    ans = Similarity.CosineSimilarity(V1, V2)
    with open(filename_a, 'w') as f_obj:
        temp = str(ans)
        contents = ''
        for i in range(0, 4):
            contents = contents + temp[i]
        f_obj.write(contents)
    print(contents)

コード例 #19

0

ファイルを表示

def intersection(num_cards, base_start, base_stop, num_trials, exp_jaccard,
                 read_lengths):
    cardinalities = np.logspace(
        base_start, base_stop,
        num_cards)  # Creates a range of cardinalities for set generation
    plot = np.zeros(num_cards)
    for i in range(num_cards):
        print('Starting ' + str(i + 1) + ' out of ' + str(num_cards))
        card = int(cardinalities[i])
        # do 10 trials for each cardinality
        results = np.zeros(num_trials)
        for j in range(num_trials):
            h1 = HLL(12)
            h2 = HLL(12)
            # Random read (length 40) generator based on the cardinalities and the expected Jaccard value
            a, b, exp_jaccard, forbes, sd = Rangen_jaccard.generate_reads(
                exp_jaccard, card, card, read_lengths)
            for s in a:
                h1.insert(s)
            for s in b:
                h2.insert(s)
            intersection = Similarity.intersection(
                h1, h2)  # Calculation of the intersection between 2 HLLs
            num_overlapped = math.ceil(
                0.02 * (card * 2) / (0.02 + 1)
            )  # Calculation of the expected intersection based on the Jaccard formula
            error = 100 * (intersection - num_overlapped
                           ) / num_overlapped  # Percent error calculation
            results[j] = error
        plot[i] = np.mean(results)
    print(plot)  # Print out percent errors
    plt.xscale('log')
    plt.title('Intersection Accuracy for reads of length 40')
    plt.xlabel('Cardinality')
    plt.ylabel('% Error (mean of 10)')
    plt.ylim(-100, 100)
    plt.scatter(cardinalities, plot)
    plt.show()

コード例 #20

0

ファイルを表示

def sd(num_cards, base_start, base_stop, num_trials, exp_sd, read_lengths):
    cardinalities = np.logspace(
        base_start, base_stop,
        num_cards)  # Generates a range of cardinalities for set generation
    plot = np.zeros(num_cards)
    for i in range(num_cards):
        print('Starting ' + str(i + 1) + ' out of ' + str(num_cards))
        card = int(cardinalities[i])
        # do 10 trials for each cardinality
        results = np.zeros(num_trials)
        for j in range(num_trials):
            h1 = HLL(12)
            h2 = HLL(12)
            # Generate reads of length 40 based on the expected Sorensen-Dice, 0.04
            a, b, exp_sd = Rangen_sorensen_dice.generate_reads(
                exp_sd, card, card, read_lengths)
            for s in a:
                h1.insert(s)
            for s in b:
                h2.insert(s)
            # Calculation of intersection between two HLLs, necessary for SD calculation
            intersection = Similarity.intersection(h1, h2)
            obs_sd = 2 * intersection / (
                h1.cardinality() + h2.cardinality()
            )  # Calculation of expected Sorensen-Dice
            error = 100 * (obs_sd -
                           exp_sd) / exp_sd  # Percent error calculation for SD
            results[j] = error
        plot[i] = np.mean(results)
    print(plot)  # Print out all percent errors
    plt.xscale('log')
    plt.title('Sorensen-Dice (set at 0.04) Accuracy for reads of length 40')
    plt.xlabel('Cardinality')
    plt.ylabel('% Error (mean of 10)')
    plt.ylim(-100, 100)
    plt.scatter(cardinalities, plot)
    plt.show()

コード例 #21

0

ファイルを表示

 def parseNewDataType(self, fileName, delimiter):
     """
     Parse additional data types for existing nodes in the graph
     :param fileName: file from which to read (.csv)
     :param delimit: delimiter of file
     :return: none
     """
     task = threading.Thread(target=self.addAttributes(fileName, delimiter))
     task.start()
     with open(fileName, 'r') as f_in:
         if self.verbose:
             print("Calculating Distances for Data Type " + str(self.files_read) + " ...", end=" ", flush=True)
         reader = csv.reader(f_in, delimiter=delimiter)
         # skip headers
         next(reader)
         patients = [row for row in reader if row != []]
         distances = Similarity.initialDistance(patients)
         ids = [row[0] for row in patients]
         task.join()
         # write initial distances
         self.conn.addRelationsFromBuffer(ids, distances, "Similarity " + str(self.files_read))
         self.files_read += 1
         if self.verbose:
             print("Done.")

コード例 #22

0

ファイルを表示

ファイル: compareErrors.py プロジェクト: breecummins/StateSpace

 def calcErrs(summary,
              M1est,
              M2est,
              M1ref=M1[corr:, :],
              M2ref=M2[corr:, :],
              name1='M{0}'.format(names[compind1]),
              name2='M{0}'.format(names[compind2])):
     print(
         '############################################################################'
     )
     print(summary)
     sys.stdout.flush()
     err1 = Similarity.RootMeanSquaredErrorManifold(M1ref, M1est)
     err2 = Similarity.RootMeanSquaredErrorManifold(M2ref, M2est)
     printMe('RMSE', name1, name2, err1, err2)
     err1 = Similarity.MeanErrorManifold(M1ref, M1est)
     err2 = Similarity.MeanErrorManifold(M2ref, M2est)
     printMe('Mean error per point', name1, name2, err1, err2)
     err1 = Similarity.HausdorffDistance(M1ref, M1est)
     err2 = Similarity.HausdorffDistance(M2ref, M2est)
     printMe('Hausdorff distance', name1, name2, err1, err2)

コード例 #23

0

ファイルを表示

ファイル: CBR_Travel.py プロジェクト: WilliamHsuNz/CBR_Travel_Case

	def reveal(self):
		#handling blank entries  Weight default values are 1
		if len(self.Holiday_Type_cb.get()) == 0:
			self.Holiday_Type_cb.set("Arbitrary")
		if len(self.Holiday_Type_Weight_cb.get()) == 0:
			self.Holiday_Type_Weight_cb.set(1)
		if len(self.Price_Entry.get()) == 0:
			self.Price_Entry.insert(0, 10000)#maximum price
		#handle non int input for price
		try:
			x = int(self.Price_Entry.get())
		except ValueError:
			output = "Value for Price must be an integer"
			self.text.delete(0.0, END)
			self.text.insert(0.0, output)	
		if len(self.Price_Weight_cb.get()) == 0:
			self.Price_Weight_cb.set(1)
		if len(self.Number_Of_Persons_cb.get()) == 0:
			self.Number_Of_Persons_cb.set(1)#minimum number of people
		if len(self.Number_Of_Persons_Weight_cb.get()) == 0:
			self.Number_Of_Persons_Weight_cb.set(1)
		if len(self.Region_cb.get()) == 0:
			self.Region_cb.set("Arbitrary")
		if len(self.Region_Weight_cb.get()) == 0:
			self.Region_Weight_cb.set(1)
		if len(self.Transportation_cb.get()) == 0:
			self.Transportation_cb.set("Arbitrary")
		if len(self.Transportation_Weight_cb.get()) == 0:
			self.Transportation_Weight_cb.set(1)
		if len(self.Duration_cb.get()) == 0:
			self.Duration_cb.set(1)
		if len(self.Duration_Weight_cb.get()) == 0:
			self.Duration_Weight_cb.set(1)
		if len(self.Season_cb.get()) == 0:
			self.Season_cb.set("Arbitrary")
		if len(self.Season_Weight_cb.get()) == 0:
			self.Season_Weight_cb.set(1)
		if len(self.Accommodation_Type_cb.get()) == 0:
			self.Accommodation_Type_cb.set("Arbitrary")
		if len(self.Accommodation_Type_Weight_cb.get()) == 0:
			self.Accommodation_Type_Weight_cb.set(1)

		#print(self.Holiday_Type_cb.get() + "\n" + self.Price_Entry.get() + "\n" + self.Number_Of_Persons_cb.get() + "\n" +\
		#		self.Region_cb.get() + "\n" + self.Transportation_cb.get() + "\n" + self.Duration_cb.get() + "\n" + \
		#		self.Season_cb.get() + "\n" + self.Accommodation_Type_cb.get() + "\n")
		query_case = 	Case('Query Journey', '0', self.Holiday_Type_cb.get(), self.Price_Entry.get(), 
						self.Number_Of_Persons_cb.get(), self.Region_cb.get(), self.Transportation_cb.get(),
						self.Duration_cb.get(), self.Season_cb.get(), self.Accommodation_Type_cb.get(),
						"Hotel")
		
		#print ("type of cb weight")
		#print(type(self.Holiday_Type_Weight_cb.get()))
		Total_Weight = int(self.Holiday_Type_Weight_cb.get()) + int(self.Price_Weight_cb.get()) + \
					   int(self.Number_Of_Persons_Weight_cb.get()) + int(self.Region_Weight_cb.get()) + \
					   int(self.Transportation_Weight_cb.get()) + int(self.Duration_Weight_cb.get()) + \
					   int(self.Season_Weight_cb.get()) + int(self.Accommodation_Type_Weight_cb.get())
		Score_list = []				
		output = ""
		index_list = []
		for i in range(len(cases)):
			#Calculate local similarity scores
			holiday_type_similarity = Similarity.holiday_type(query_case, cases[i])
			price_similariy = Similarity.price(query_case, cases[i])
			number_of_persons_similarity = Similarity.number_of_persons(query_case, cases[i])
			region_similarity = Similarity.region(query_case, cases[i])
			transportation_similarity = Similarity.transportation(query_case, cases[i])
			duration_similarity = Similarity.duration(query_case, cases[i])
			season_similarity = Similarity.season(query_case, cases[i])
			accommodation_similarity = Similarity.accommodation(query_case, cases[i])		
				
			#Calculate global score
			global_holiday_type = holiday_type_similarity * int(self.Holiday_Type_Weight_cb.get())
			global_price = price_similariy * int(self.Price_Weight_cb.get())
			global_number_of_persons = number_of_persons_similarity *\
										int(self.Number_Of_Persons_Weight_cb.get())
			global_region = region_similarity * int(self.Region_Weight_cb.get())
			global_transportation = transportation_similarity * int(self.Transportation_Weight_cb.get())
			global_duration = duration_similarity * int(self.Duration_Weight_cb.get())
			global_season = season_similarity * int(self.Season_Weight_cb.get())
			global_accommodation = accommodation_similarity * int(self.Accommodation_Type_Weight_cb.get())
			
			total_similarity = global_holiday_type + global_price + global_number_of_persons+ \
								global_region + global_transportation + global_duration + \
								global_season + global_accommodation
			global_similarity = total_similarity/Total_Weight
			index_list.append([i, global_similarity, [holiday_type_similarity, 
								price_similariy, number_of_persons_similarity,
								region_similarity,  transportation_similarity,
								duration_similarity, season_similarity, 
								accommodation_similarity]])

		#Rank and RETRIEVE the 10 most similar cases
		index_list.sort(key= lambda x: x[1], reverse = True)
		k = 10
		for i in range(k):
			cases_index = index_list[i][0]
			case = cases[cases_index]
			output += "Similarity score: " + str(round(index_list[i][1], 3)) + "\n" + \
						"Holiday Type: " + case.holiday_type + "\n" + \
						"Price: " + str(case.price) + "\n" + \
						"Number of Persons: " + str(case.number_of_persons) + "\n" + \
						"Region: " + case.region + "\n" + \
						"Transportation: " + case.transportation + "\n" + \
						"Duration: " + str(case.duration) + "\n" + \
						"Season: " + case.season + "\n" + \
						"Accommodation: "+ case.accommodation + "\n" + \
						"Hotel: " + case.hotel + "\n \n" 	
			#Out put for displaying individual local similarity scores
			#output += "Similarity score: " + str(round(index_list[i][1], 3)) + "\n" + \
			#			"Holiday Type: " + case.holiday_type + "\n" + str(index_list[i][2][0]) + "\n" +\
			#			"Price: " + str(case.price) + "\n" + str(index_list[i][2][1]) + "\n" + \
			#			"Number of Persons: " + str(case.number_of_persons) + "\n" + str(index_list[i][2][2]) + "\n" + \
			#			"Region: " + case.region + "\n" + str(index_list[i][2][3]) + "\n" + \
			#			"Transportation: " + case.transportation + "\n" + str(index_list[i][2][4]) + "\n" + \
			#			"Duration: " + str(case.duration) + "\n" + str(index_list[i][2][5]) + "\n" + \
			#			"Season: " + case.season + "\n" + str(index_list[i][2][6]) + "\n" + \
			#			"Accommodation: "+ case.accommodation + "\n" +  str(index_list[i][2][7]) +"\n" +\
			#			"Hotel: " + case.hotel + "\n \n" 	
		#Display output			
		self.text.delete(0.0, END)
		self.text.insert(0.0, output)

コード例 #24

0

ファイルを表示

    # [ser.write(i.to_bytes(1, byteorder='little')) for i in buf]
    if connected:
        ser.flushInput()
        ser.flushOutput()
        [ser.write(chr(i)) for i in buf]

def sum_str(str_arr):
    string = ""
    for i in str_arr:
        string += i
    return string

# ----------------------------------- Valiables ----------------------------------- #
sub = Subscribers()
init_pose_pub = rospy.Publisher("/init_pose", UInt8, queue_size=1)
sim = Similarity.Similarity()
Posenames = []
finger_state = []
max_power = []
root = Tk()
Min = 0.95
tb_defalt = "new pose name or filename to load and save"
th1 = th.Thread(target=find_proc)
st_flg = False
file_path = "/home/fumyia/"
portname = "/dev/ttyACM1"
baudrate = 115200
connected = False
try:
    ser = serial.Serial(portname, baudrate)
    connected = True

コード例 #25

0

ファイルを表示

    #                 words[key[0]] = scores[scores_index[i]] * key[1]
    # words = sorted(words.items(), key=lambda i: i[1], reverse=True)[:10]
    # print('dot_att_', words)

    if len(udata) > 0:
        ldaModel = LDA.LDAModel(data, n_topics=num_topics)

        data_vec = []
        for i in range(num_data_final):
            data_vec.append(ldaModel.get_doc_vec(data[i]))

        user_doc_vec = []
        for text in udata:
            user_doc_vec.append(ldaModel.get_doc_vec(text))

        scores = Similarity.avg_sim(user_doc_vec, data_vec)
        scores_index = np.argsort(scores)[::-1]
        words = {}

        for i in range(num_cand):
            # print(data[scores_index[i]])
            keys = jieba.analyse.extract_tags(ldaModel.preprocess.deltag(data[scores_index[i]]), num_word,
                                              withWeight=True)
            # print(keys)
            for key in keys:
                if key[0] not in key_set:
                    if key[0] in words:
                        words[key[0]] += scores[scores_index[i]] * key[1]
                    else:
                        words[key[0]] = scores[scores_index[i]] * key[1]
        words = sorted(words.items(), key=lambda i: i[1], reverse=True)[:10]

コード例 #26

0

ファイルを表示

        raise ValueError("测试文件列表路径 '{}' 不存在！".format(args.test_file_list))
    else:
        with open(args.test_file_list, mode='r') as f:
            all_test_file = f.readlines()

        for file_path in all_test_file:
            if not os.path.exists(file_path.strip()):
                # print(file_path)
                raise ValueError('请将测试的文件都移到当前文件夹 或 在列表中写绝对路径！')

        if args.flag:
            print('读取中...')
            wv_from_text = KeyedVectors.load_word2vec_format(
                args.pre_train_txt, binary=False)
            print('完毕\n')
            S = Similarity(wv_from_text)
        else:
            print('训练中...')
            model = word2vec.Word2Vec(whole_file_sens,
                                      hs=1,
                                      min_count=1,
                                      window=3,
                                      size=200)
            print('完毕\n')
            wv_simple = model.wv
            del model
            S = Similarity(wv_simple)

        for file_path in all_test_file:
            f = codecs.open(file_path.strip())
            txt = f.read()

コード例 #27

0

ファイルを表示

ファイル: compute_similarity.py プロジェクト: uic-evl/LymphaticCancerViz

def compute_similarity(patient_list):
    global patients_pointer, tanimoto_edges_output, tanimoto_nodes_output, tanimoto_weighted_output, \
        tanimoto_bigrams_output, jaccard_output

    # calculate the similarity and output it to the files
    od = OrderedDict(sorted(patient_list.items()))
    patients_pointer = od

    # create a list of the other patients
    other_patients = copy.deepcopy(patients_pointer)

    # iterate over the patients and compute the similarity
    for keyA, patientA in patients_pointer.iteritems():

        # if keyA != 10013:
        #     continue

        # store the scores of the test
        tanimoto_edges_scores = []
        tanimoto_nodes_scores = []
        tanimoto_bigrams_scores = []

        jaccard_scores = []

        # iterate over all of the other patients
        for keyB, patientB in other_patients.iteritems():

            # if keyB != 10023:
            #     continue

            # Find all of the common edges between patient A and patient B
            common_list = sorted(list(set(patientA.get_all_edges()) | set(patientB.get_all_edges())))

            # Find all of the common node/node pairs between patient A and patient B
            common_combined_nodes = sorted(
                list(set(patientA.get_all_combined_nodes()) | set(patientB.get_all_combined_nodes())))

            bigrams_a = patientA.get_all_combined_nodes_bigrams()
            bigrams_b = patientB.get_all_combined_nodes_bigrams()

            common_nodes_bigrams = sorted( list( set(bigrams_a) | set(bigrams_b) ) )

            # Create the edge vector for each patient
            vector_a_edges = patientA.get_edges_vector(common_list)
            vector_b_edges = patientB.get_edges_vector(common_list)

            #  Create the node vector for each patient
            vector_a_nodes = patientA.get_nodes_vector(common_combined_nodes, False)
            vector_b_nodes = patientB.get_nodes_vector(common_combined_nodes, False)

            vector_a_nodes_bigrams = patientA.get_nodes_vector(common_nodes_bigrams, True)
            vector_b_nodes_bigrams = patientB.get_nodes_vector(common_nodes_bigrams, True)

            # Compute the scores
            tanimoto_nodes = sim.compute_tanimoto_coeff(vector_a_nodes, vector_b_nodes)
            tanimoto_edges = sim.compute_tanimoto_coeff(vector_a_edges, vector_b_edges)
            tanimoto_bigrams = sim.compute_tanimoto_coeff(vector_a_nodes_bigrams, vector_b_nodes_bigrams)

            jaccard = sim.compute_jaccard_coeff(patientA.get_all_unique_nodes(),
                                                patientB.get_all_unique_nodes())

            # Save the scores to their respective arrays
            tanimoto_edges_scores.append(tanimoto_edges)
            tanimoto_nodes_scores.append(tanimoto_nodes)
            tanimoto_bigrams_scores.append(tanimoto_bigrams)
            jaccard_scores.append(jaccard)

        # Find the maximum score (necessary because there may be no edges )
        max_edge_score = max(tanimoto_edges_scores)
        if max_edge_score == 0:
            tanimoto_edges_scores = [0 for i in tanimoto_edges_scores]
        else:
            tanimoto_edges_scores = [float(i) / max(tanimoto_edges_scores) for i in tanimoto_edges_scores]

        # Normalize all of the scores (tanimoto nodes, jaccard, and weighted tanimoto)
        tanimoto_nodes_scores = [float(i) / max(tanimoto_nodes_scores) for i in tanimoto_nodes_scores]
        tanimoto_bigrams_scores = [float(i) / max(tanimoto_bigrams_scores) for i in tanimoto_bigrams_scores]
        jaccard_scores = [float(i) / max(jaccard_scores) for i in jaccard_scores]
        tanimoto_weighted = [tanimoto_edges_scores[i] * 0.5 + tanimoto_bigrams_scores[i] * 0.5 for i in
                    range(len(tanimoto_edges_scores))]

        tanimoto_edges_output[keyA] = sort_by_scores(tanimoto_edges_scores, other_patients)
        tanimoto_nodes_output[keyA] = sort_by_scores(tanimoto_nodes_scores, other_patients)
        tanimoto_bigrams_output[keyA] = sort_by_scores(tanimoto_bigrams_scores, other_patients)
        tanimoto_weighted_output[keyA] = sort_by_scores(tanimoto_weighted, other_patients)
        jaccard_output[keyA] = sort_by_scores(jaccard_scores, other_patients)

コード例 #28

0

ファイルを表示

import TextPreprocess
import Process
import Features
import Similarity
import sys

wordlist_s = TextPreprocess.Tokens(sys.argv[1])
wordlist_e = TextPreprocess.Tokens(sys.argv[2])

dict_s = Process.DictBiulder(wordlist_s)
dict_e = Process.DictBiulder(wordlist_e)

Fin_dict = Process.MergeDict(dict_s, dict_e)

V1 = Features.GetVector(dict_s, Fin_dict)
V2 = Features.GetVector(dict_e, Fin_dict)
ans = Similarity.CosineSimilarity(V1, V2)

with open(sys.argv[3], 'w') as f_obj:
    if ans == 0:
        f_obj.write("0.00")
    else:
        temp = str(ans)
        contents = ''
        for i in range(0, 4):
            contents = contents + temp[i]
        f_obj.write(contents)

コード例 #29

0

ファイルを表示

ファイル: Tester.py プロジェクト: yaozh16/SequenceSimilarity

def testMethods(testFile, methodList, output_dir):
    CommonOperation.checkDirectory(output_dir)
    index2Class, serialMatrix = loadTest(testFile)
    N = len(index2Class)
    assert N == serialMatrix.shape[0]
    clsSizes = dict([[cls, len([e for e in index2Class if e == cls])]
                     for cls in set(index2Class)])
    clsMaxScores = dict([[cls,
                          np.sum([N - e for e in range(clsSizes[cls])])]
                         for cls in set(index2Class)])
    clsMinScores = dict(
        [[cls, np.sum([e + 1 for e in range(clsSizes[cls] - 1)]) + N]
         for cls in set(index2Class)])
    method2ScoreRate = {}
    method2ErrorRate = {}
    for method in methodList:
        print("method:{0}".format(method))
        scoreValues = []
        scoreRates = []
        errorCounts = []
        errorRates = []
        matrix_dump_file = join(output_dir, "similarity_matrix",
                                "{0}".format(method))
        if (os.path.exists(matrix_dump_file)):
            simiarityMatrix = CommonOperation.pickleLoad(matrix_dump_file, {})
        else:
            simiarityMatrix = np.zeros([N, N], dtype=np.float)
            for i in range(N):
                cls = index2Class[i]
                print("{0}(id:\033[1;{2}m{1}\033[0m)".format(cls, i, cls + 31),
                      end=",")
                for j in range(i, N):
                    s = Similarity.Similarity(serialMatrix[i], serialMatrix[j])
                    simiarity_ij = s.use_method(method)
                    simiarityMatrix[i][j] = simiarity_ij
                    simiarityMatrix[j][i] = simiarity_ij
            print()
            CommonOperation.pickleDump(matrix_dump_file, simiarityMatrix)
        for i in range(N):
            cls = index2Class[i]
            similarRank = list(np.argsort(simiarityMatrix[i]))
            similarRank.reverse()
            scoreValues.append(
                np.sum([(N - j) for j, r in enumerate(similarRank)
                        if index2Class[r] == cls]))
            scoreRates.append(100.0 * (scoreValues[-1] - clsMinScores[cls]) /
                              (clsMaxScores[cls] - clsMinScores[cls]))
            errorCounts.append(clsSizes[cls] - len([
                r for r in similarRank[:clsSizes[cls]] if index2Class[r] == cls
            ]))
            errorRates.append(100.0 * errorCounts[-1] / clsSizes[cls])
            #for j in range(N):
            #    print("{0}(id:\033[1;{2}m{1}\033[0m,sim:{3})".format(index2Class[similarRank[j]],similarRank[j],index2Class[similarRank[j]]+31,simiarityMatrix[i][similarRank[j]]),end=",")
            print("statistic:score:{0}({1}%),errorCount:{2}/({3}%)".format(
                scoreValues[-1], scoreRates[-1], errorCounts[-1],
                errorRates[-1]))
        #print("final statistic(average):\n\tscoreRate:{0}\n\terrorRate:{1}".format(np.average(scoreRates), np.average(errorRates)))
        method2ScoreRate[method] = scoreRates
        method2ErrorRate[method] = errorRates

        plt.figure(figsize=(40, 10))
        plt.bar(range(N), scoreRates)
        plt.xlabel("index")  #设置x轴标签
        plt.ylabel("score rate")  #设置y轴标签
        plt.title("{0} score rate ".format(method))
        plt.ylim(50, 100)
        plt.savefig(join(output_dir, "{0}_score_rate.png".format(method)))
        plt.close()

        plt.figure(figsize=(40, 10))
        plt.bar(range(N), errorRates)
        plt.xlabel("index")  #设置x轴标签
        plt.ylabel("error rate")  #设置y轴标签
        plt.title("{0} error rate ".format(method))
        plt.ylim(0, 50)
        plt.savefig(join(output_dir, "{0}_error_rate.png".format(method)))
        plt.close()

        print(
            "statistic(average):\n|algorithm|scoreRate|errorRate|\n|:-:| :-: | :-: |"
        )
        print("|{0}|{1}|{2}|".format(method, "%.3f" % np.average(scoreRates),
                                     "%.3f" % np.average(errorRates)))

    methodList = sorted(
        methodList, key=lambda method: np.average(method2ErrorRate[method]))
    print(methodList)
    print(
        "final statistic(average):\n|algorithm|scoreRate(%)| errorRate(%)|\n|:-:| :-: | :-: |"
    )
    for method in methodList:
        print("|{0}|{1}|{2}|".format(
            method, "%.2f" % np.average(method2ScoreRate[method]),
            "%.2f" % np.average(method2ErrorRate[method])))
    CommonOperation.pickleDump(join(output_dir, "method2ScoreRate"),
                               method2ScoreRate)
    CommonOperation.pickleDump(join(output_dir, "method2ErrorRate"),
                               method2ErrorRate)

コード例 #30

0

ファイルを表示

ファイル: Lex_C.py プロジェクト: mtk12/Roman-Urdu-Lexical-variation-via-Clustering

class LexC:

    d = s.Similarity()
    sim_dict = {}

    def calculate_centroid(self, c, features, weights, prev_dict, next_dict):

        clusters = c
        centroid = []

        print("Calculating Centroids------")
        for wordi in clusters:
            r = []
            for wordj in clusters[wordi]:
                self.sim_dict.setdefault(wordj, {})
                rj = 0
                for word in clusters[wordi]:
                    di = self.d.calculate_distance(wordj, word, features,
                                                   weights, prev_dict,
                                                   next_dict)
                    self.sim_dict[wordj][word] = di
                    rj = rj + di
                r.append(rj)
            m = np.argmax(r)
            centroid.append(clusters[wordi][m])
        print("Centroids Calculated------")
        return centroid

    def make_clusters(self, initial_clusters, max_iteration, threshold,
                      features, weights, prev_dict, next_dict):

        words = list(prev_dict.keys())
        c = initial_clusters

        for it in range(max_iteration):
            print("Iteration Number:" + str(it) + "-----")
            centroid = self.calculate_centroid(c, features, weights, prev_dict,
                                               next_dict)
            cluster = {}
            for i in centroid:
                cluster.setdefault(i, [])

            print("Making Clusters----")
            for w in words:
                print(w)
                closest = 0
                maxSim = 0
                x = list(self.sim_dict[w].keys())
                inter = list(set(x).intersection(set(centroid)))
                for ci in inter:
                    try:
                        if (w[0] == ci[0] and w[1] == ci[1]):
                            if self.sim_dict[w][
                                    ci] > threshold and self.sim_dict[w][
                                        ci] > maxSim:
                                maxSim = self.sim_dict[w][ci]
                                closest = ci
                    except:
                        if (w[0] == ci[0]):
                            if self.sim_dict[w][
                                    ci] > threshold and self.sim_dict[w][
                                        ci] > maxSim:
                                maxSim = self.sim_dict[w][ci]
                                closest = ci

                if closest != 0:
                    cluster[closest].append(w)
                else:
                    key = w
                    cluster.setdefault(key, [])
                    centroid.append(key)

            clusters = {}
            x = 0
            for i in cluster:
                clusters[x] = cluster[i]
                if (i not in clusters[x]):
                    clusters[x].append(i)
                x += 1
            c = clusters

        print("Writing CLusters in file-----")
        self.write_results(clusters, features, weights)

        return clusters

    def write_results(self, clusters, features, weights):
        filename = features + str(weights[0]) + "_" + str(
            weights[1]) + "_" + str(weights[2]) + "_" + str(weights[3])
        file = open("Result/" + filename + " Cluster.txt", "w")
        for i in clusters:
            file.write(str(i) + " ")
            for j in clusters[i]:
                file.write(j + ",")
            file.write("\n")

コード例 #31

0

ファイルを表示

def compute_similarity(patient_list):
    global patients_pointer, tanimoto_edges_output, tanimoto_nodes_output, tanimoto_weighted_output, \
        tanimoto_bigrams_output, jaccard_output

    # calculate the similarity and output it to the files
    od = OrderedDict(sorted(patient_list.items()))
    patients_pointer = od

    # create a list of the other patients
    other_patients = copy.deepcopy(patients_pointer)

    # iterate over the patients and compute the similarity
    for keyA, patientA in patients_pointer.items():

        # if keyA != 10013:
        #     continue

        # store the scores of the test
        tanimoto_edges_scores = []
        tanimoto_nodes_scores = []
        tanimoto_bigrams_scores = []

        jaccard_scores = []

        # iterate over all of the other patients
        for keyB, patientB in other_patients.items():

            # if keyB != 10023:
            #     continue

            # Find all of the common edges between patient A and patient B
            common_list = sorted(
                list(
                    set(patientA.get_all_edges())
                    | set(patientB.get_all_edges())))

            # Find all of the common node/node pairs between patient A and patient B
            common_combined_nodes = sorted(
                list(
                    set(patientA.get_all_combined_nodes())
                    | set(patientB.get_all_combined_nodes())))

            bigrams_a = patientA.get_all_combined_nodes_bigrams()
            bigrams_b = patientB.get_all_combined_nodes_bigrams()

            common_nodes_bigrams = sorted(
                list(set(bigrams_a) | set(bigrams_b)))

            # Create the edge vector for each patient
            vector_a_edges = patientA.get_edges_vector(common_list)
            vector_b_edges = patientB.get_edges_vector(common_list)

            #  Create the node vector for each patient
            vector_a_nodes = patientA.get_nodes_vector(common_combined_nodes,
                                                       False)
            vector_b_nodes = patientB.get_nodes_vector(common_combined_nodes,
                                                       False)

            vector_a_nodes_bigrams = patientA.get_nodes_vector(
                common_nodes_bigrams, True)
            vector_b_nodes_bigrams = patientB.get_nodes_vector(
                common_nodes_bigrams, True)

            # Compute the scores
            tanimoto_nodes = sim.compute_tanimoto_coeff(
                vector_a_nodes, vector_b_nodes)
            tanimoto_edges = sim.compute_tanimoto_coeff(
                vector_a_edges, vector_b_edges)
            tanimoto_bigrams = sim.compute_tanimoto_coeff(
                vector_a_nodes_bigrams, vector_b_nodes_bigrams)

            jaccard = sim.compute_jaccard_coeff(
                patientA.get_all_unique_nodes(),
                patientB.get_all_unique_nodes())

            # Save the scores to their respective arrays
            tanimoto_edges_scores.append(tanimoto_edges)
            tanimoto_nodes_scores.append(tanimoto_nodes)
            tanimoto_bigrams_scores.append(tanimoto_bigrams)
            jaccard_scores.append(jaccard)

        # Find the maximum score (necessary because there may be no edges )
        max_edge_score = max(tanimoto_edges_scores)
        if max_edge_score == 0:
            tanimoto_edges_scores = [0 for i in tanimoto_edges_scores]
        else:
            tanimoto_edges_scores = [
                float(i) / max(tanimoto_edges_scores)
                for i in tanimoto_edges_scores
            ]

        # Normalize all of the scores (tanimoto nodes, jaccard, and weighted tanimoto)
        tanimoto_nodes_scores = [
            float(i) / max(tanimoto_nodes_scores)
            for i in tanimoto_nodes_scores
        ]
        tanimoto_bigrams_scores = [
            float(i) / max(tanimoto_bigrams_scores)
            for i in tanimoto_bigrams_scores
        ]
        jaccard_scores = [
            float(i) / max(jaccard_scores) for i in jaccard_scores
        ]
        tanimoto_weighted = [
            tanimoto_edges_scores[i] * 0.5 + tanimoto_bigrams_scores[i] * 0.5
            for i in range(len(tanimoto_edges_scores))
        ]

        tanimoto_edges_output[keyA] = sort_by_scores(tanimoto_edges_scores,
                                                     other_patients)
        tanimoto_nodes_output[keyA] = sort_by_scores(tanimoto_nodes_scores,
                                                     other_patients)
        tanimoto_bigrams_output[keyA] = sort_by_scores(tanimoto_bigrams_scores,
                                                       other_patients)
        tanimoto_weighted_output[keyA] = sort_by_scores(
            tanimoto_weighted, other_patients)
        jaccard_output[keyA] = sort_by_scores(jaccard_scores, other_patients)

コード例 #32

0

ファイルを表示

    query_users = 'select UserID from users'
    cursor.execute(query_users)

    for userID in cursor:
        users.append(userID[0])

except mysql.connector.Error as e:
    print 'connect failed!{}'.format(e)

# print movies[-1][0], users[-1][0]
# 初始化用户-物品矩阵
print users[-1], movies[-1]
dataMat = zeros((users[-1] + 1, movies[-1] + 1))

try:
    query_ratings = "select UserID, MovieID,Rating from ratings"
    cursor.execute(query_ratings)
    for userid, movieid, rating in cursor:
        # print userid, movieid, rating
        dataMat[userid, movieid] = rating
finally:
    cursor.close()
    conn.close()
#得到的是一个list,(编号， 评分)
# print Similarity.userSimiliar(mat(dataMat), 1, Similarity.cosSim)
result = Similarity.simBetweenUsers(mat(dataMat), users, Similarity.cosSim)
for user1 in users:
    for user2 in users:
        print user1, user2, result[user1, user2]

Python Similarity, DeepMatchの例