Exemple #1
0
    def print_region_info(self):
        print "REGION INFO"
        print
        print "For each feature set, the average sim score per region by cluster and the fixed weights"
        print

        for x, cluster in enumerate(self.clusters):
            print "%d:\t%s\t%d\n" % (x, cluster.label, len(cluster.members))
            for name in cluster.center.get_feature_set_names():
                mats = map(lambda _doc: cluster.center.region_sim(_doc, name),
                           cluster.members)
                avg_mat = utils.avg_mats(mats)
                weight_mat = cluster.center.region_weights(name)
                print name
                utils.print_mat(utils.apply_mat(avg_mat, lambda x: "%.3f" % x))
                print
                utils.print_mat(
                    utils.apply_mat(weight_mat, lambda x: "%.3f" % x))
                print

            #list_of_sim_mats = map(lambda _doc: cluster.center.similarity_mats_by_name(_doc), cluster.members)
            #list_of_weight_mats = cluster.center.similarity_weights_by_name(cluster.members[0])
            #for name in cluster.center.similarity_function_names():
            #	mats = map(lambda x: x[name], list_of_sim_mats)
            #	avg_mat = utils.avg_mats(mats)
            #	weight_mat = list_of_weight_mats[name]
            #	print name
            #	utils.print_mat(utils.apply_mat(avg_mat, lambda x: "%.3f" % x))
            #	print
            #	utils.print_mat(utils.apply_mat(weight_mat, lambda x: "%.3f" % x))
            #	print
        print
        print
Exemple #2
0
def main(args):
    if(len(args) != 1):
        print "Usage: mds.py C clustering.pkl"
        print "     C is the cluster in clustering.pkl to display"
        sys.exit(0)

    #C = int(args[1])
    #path = args[2]
    
    print "Loading"
    #clustering = utils.load_obj(path)

    #docs = clustering[C].members
    docs = doc.get_docs_nested(driver.get_data_dir("small"))

    print "Calculating Pairwise Similarities"
    similarities = utils.pairwise(docs, lambda x,y: x.similarity(y))

    #print "INITIAL SIMILARITIES:"
    #utils.print_mat(similarities)

    #similarities = [[0,93,82,133],[93,0,52,60],[82,52,0,111],[133,60,111,0]]

    print "Starting MDS"
    #pos = reduction(similarities)
    pos = classicMDS(similarities)

    print "MDS:"
    utils.print_mat(pos)
Exemple #3
0
	def print_region_info(self):
		print "REGION INFO"
		print
		print "For each feature set, the average sim score per region by cluster and the fixed weights"
		print

		for x, cluster in enumerate(self.clusters):
			print "%d:\t%s\t%d\n" % (x, cluster.label, len(cluster.members))
			for name in cluster.center.get_feature_set_names():
				mats = map(lambda _doc: cluster.center.region_sim(_doc, name), cluster.members)
				avg_mat = utils.avg_mats(mats)
				weight_mat = cluster.center.region_weights(name)
				print name
				utils.print_mat(utils.apply_mat(avg_mat, lambda x: "%.3f" % x))
				print
				utils.print_mat(utils.apply_mat(weight_mat, lambda x: "%.3f" % x))
				print



			#list_of_sim_mats = map(lambda _doc: cluster.center.similarity_mats_by_name(_doc), cluster.members)
			#list_of_weight_mats = cluster.center.similarity_weights_by_name(cluster.members[0])
			#for name in cluster.center.similarity_function_names():
			#	mats = map(lambda x: x[name], list_of_sim_mats)
			#	avg_mat = utils.avg_mats(mats)
			#	weight_mat = list_of_weight_mats[name]
			#	print name
			#	utils.print_mat(utils.apply_mat(avg_mat, lambda x: "%.3f" % x))
			#	print
			#	utils.print_mat(utils.apply_mat(weight_mat, lambda x: "%.3f" % x))
			#	print
		print
		print
Exemple #4
0
def check_init():
	docs = doc.get_docs_nested(get_data_dir("test"))
	random.shuffle(docs)
	confirm = cluster.MaxCliqueInitCONFIRM(docs, 2, 10)
	confirm._init_clusters()

	print
	print "Cluster Sim Mat"
	sim_mat = confirm.get_cluster_sim_mat()
	utils.print_mat(utils.apply_mat(sim_mat, lambda x: "%3.2f" % x))
Exemple #5
0
	def display(self):
		'''
		prints the cost_mat and the op_mat
		'''
		print "Cost Matrix"
		utils.print_mat(self.cost_mat)
		print
		print "Operations Matrix"
		utils.print_mat(self.op_mat)
		print
Exemple #6
0
 def print_label_conf_mat(self):
     print "LABEL CONFUSION MATRIX:"
     print "\tRows are actual predictions.  Columns are true labels"
     mat = self.calc_conf_mat()
     labels = sorted(mat.keys())
     mat = utils.format_as_mat(mat)
     utils.insert_indices(mat)
     print "\tMat index\tLabel Name"
     for x, label in enumerate(labels):
         print "\t%d:\t%s" % (x, label)
     print
     utils.print_mat(mat)
     print
     print
Exemple #7
0
	def print_label_conf_mat(self):
		print "LABEL CONFUSION MATRIX:"
		print "\tRows are actual predictions.  Columns are true labels"
		mat = self.calc_conf_mat()
		labels = sorted(mat.keys())
		mat = utils.format_as_mat(mat)
		utils.insert_indices(mat)
		print "\tMat index\tLabel Name"
		for x,label in enumerate(labels):
			print "\t%d:\t%s" % (x, label)
		print
		utils.print_mat(mat)
		print
		print
Exemple #8
0
	def _init_clusters(self):
		super(MaxCliqueInitCONFIRM, self)._init_clusters()
		sub_docs = self.docs[:self.num_instances]
		sim_mat = utils.pairwise(sub_docs, 
			lambda x, y: max(self.doc_similarity(x, y), self.doc_similarity(y, x)))

		print
		print "Doc Sim Mat"
		utils.print_mat(utils.apply_mat(sim_mat, lambda x: "%3.2f" % x))

		idxs = utils.find_best_clique(sim_mat, self.num_clust)

		print 
		print "Cluster Labels:"
		for idx in idxs:
			self._add_cluster(self.docs[idx], member=False)
			print idx, self.docs[idx].label
Exemple #9
0
    def print_label_cluster_mat(self):
        print "LABEL-CLUSTER MATRIX:"
        print "\tSeries of matricies.  Labels are rows.  Clusters are columns."
        mat = self.label_cluster_mat
        labels = sorted(mat.keys())
        mat = utils.format_as_mat(mat)
        clusters_per_mat = 20
        mats = utils.split_mat(mat, clusters_per_mat)  # 10 clusters per matrix

        print "\tMat index\tLabel Name"
        for x, label in enumerate(labels):
            print "\t%d:\t%s" % (x, label)
        print
        for x, mat in enumerate(mats):
            utils.insert_indices(mat, col_start=(clusters_per_mat * x))
            utils.print_mat(mat)
            print
        print
Exemple #10
0
	def print_label_cluster_mat(self):
		print "LABEL-CLUSTER MATRIX:"
		print "\tSeries of matricies.  Labels are rows.  Clusters are columns."
		mat = self.label_cluster_mat
		labels = sorted(mat.keys())
		mat = utils.format_as_mat(mat)
		clusters_per_mat = 20
		mats = utils.split_mat(mat, clusters_per_mat)  # 10 clusters per matrix

		print "\tMat index\tLabel Name"
		for x,label in enumerate(labels):
			print "\t%d:\t%s" % (x, label)
		print
		for x, mat in enumerate(mats):
			utils.insert_indices(mat, col_start=(clusters_per_mat * x))
			utils.print_mat(mat)
			print
		print 
Exemple #11
0
    def _init_clusters(self):
        super(MaxCliqueInitCONFIRM, self)._init_clusters()
        sub_docs = self.docs[:self.num_instances]
        sim_mat = utils.pairwise(
            sub_docs, lambda x, y: max(self.doc_similarity(x, y),
                                       self.doc_similarity(y, x)))

        print
        print "Doc Sim Mat"
        utils.print_mat(utils.apply_mat(sim_mat, lambda x: "%3.2f" % x))

        idxs = utils.find_best_clique(sim_mat, self.num_clust)

        print
        print "Cluster Labels:"
        for idx in idxs:
            self._add_cluster(self.docs[idx], member=False)
            print idx, self.docs[idx].label
Exemple #12
0
	def print_cluster_sim_mat(self):
		print "CLUSTER SIM MATRICES:"

		centers = map(lambda cluster: cluster.center, self.clusters)
		feature_set_names = self.docs[0].get_feature_set_names()
		for name in feature_set_names:
			print
			print "Similarity Type: %s" % name
			mat = utils.pairwise(centers, lambda doc1, doc2: doc1.global_sim(doc2, name))
			mat = utils.apply_mat(mat, lambda x: "%3.2f" % x)
			utils.insert_indices(mat)
			utils.print_mat(mat)

		print
		print "Similarity Type: Cluster sim by CONFIRM"
		sub_mat = utils.pairwise(self.clusters, lambda c1, c2: self.confirm.cluster_similarity(c1, c2))
		sub_mat = utils.apply_mat(sub_mat, lambda x: "%3.2f" % x)
		utils.insert_indices(sub_mat)
		utils.print_mat(sub_mat)
		print
		print
Exemple #13
0
def cmp_test():
	doc1 = doc.get_doc(single_dir, single_file)
	doc2 = doc.get_doc(second_dir, second_file)
	doc1._load_check()
	doc2._load_check()

	doc1.display()
	doc2.display()

	global_region_sims = doc1.global_region_sim(doc2)
	global_region_weights = doc1.global_region_weights()
	global_sims = doc1.global_sim(doc2)
	region_sims = doc1.region_sim(doc2)
	region_weights1 = doc1.region_weights()
	region_weights2 = doc2.region_weights()


	for x, name in enumerate(doc1.feature_set_names):
		print
		print name
		print "Global Sim:", global_sims[x]
		print "Region Sims:"
		print
		utils.print_mat(utils.apply_mat(region_sims[x], lambda x: "%.3f" % x))
		print
		print "Region Weights doc1:"
		print
		utils.print_mat(utils.apply_mat(region_weights1[x], lambda x: "%.3f" % x))
		print
		print "Region Weights doc2:"
		print
		utils.print_mat(utils.apply_mat(region_weights2[x], lambda x: "%.3f" % x))
	print
	print "Match Vec"
	print
	match_vec = doc1.match_vector(doc2)
	for x in xrange(len(match_vec) / 10):
		print match_vec[10 * x: 10 * (x + 1)]
	print

	print "Sim Vector:"
	print " ".join(map(lambda x: "%.2f" % x, global_region_sims))
	print "Sim Weights:"
	print " ".join(map(lambda x: "%.2f" % x, global_region_weights))

	#doc1.draw().save("output/doc1.png")
	#doc2.draw().save("output/doc2.png")

	#doc1.push_away(doc2)
	#doc1.draw().save("output/doc1_pushed.png")
	#doc2.draw().save("output/doc2_pushed.png")

	#doc1.push_away(doc2)

	doc1.aggregate(doc2)
	doc1.display()
Exemple #14
0
    def print_cluster_sim_mat(self):
        print "CLUSTER SIM MATRICES:"

        centers = map(lambda cluster: cluster.center, self.clusters)
        feature_set_names = self.docs[0].get_feature_set_names()
        for name in feature_set_names:
            print
            print "Similarity Type: %s" % name
            mat = utils.pairwise(
                centers, lambda doc1, doc2: doc1.global_sim(doc2, name))
            mat = utils.apply_mat(mat, lambda x: "%3.2f" % x)
            utils.insert_indices(mat)
            utils.print_mat(mat)

        print
        print "Similarity Type: Cluster sim by CONFIRM"
        sub_mat = utils.pairwise(
            self.clusters,
            lambda c1, c2: self.confirm.cluster_similarity(c1, c2))
        sub_mat = utils.apply_mat(sub_mat, lambda x: "%3.2f" % x)
        utils.insert_indices(sub_mat)
        utils.print_mat(sub_mat)
        print
        print
Exemple #15
0
            break
        x = R[idx:, idx]
        if np.linalg.norm(x) == 0.:
            continue
        e = np.zeros_like(x)
        e[0] = np.linalg.norm(x)
        u = x - e
        v = u / np.linalg.norm(u)
        Q_cnt = np.identity(m)
        Q_cnt[idx:, idx:] -= 2.0 * np.outer(v, v)  #
        R = np.dot(Q_cnt, R)  #R=P1P2..PnA
        Q = np.dot(Q_cnt, Q)  #Q=P1P2P3...Pn
    return np.round(Q.T, 3), np.round(R, 3)  #保留三位
    # return Q.T,R


if __name__ == "__main__":
    path = r'data.txt'
    matrix = load_mat(path, "HR")
    if matrix.size == 0:
        print("input Error!")
        sys.exit()
    Q, R = Householder_Reduction(matrix)
    m, _ = Q.shape
    m, n = R.shape
    print(np.round(np.dot(Q, R), 2))
    print("Q=")
    print_mat(Q, m, m)
    print("R=")
    print_mat(R, m, n)
Exemple #16
0
def main(args):
    path = args.files
    matrix = load_mat(path, args.mode)
    if matrix.size == 0:
        print("input Matrix Error!")
        sys.exit()
    m, n = matrix.shape
    if args.mode == "LU":
        print("LU Factorization, the input should be a square matrix.\n")
    elif args.mode == "QR":
        r = np.linalg.matrix_rank(matrix)
        if r < n:
            print(
                "Error!\n QR Factorization, The matrix has linearly dependent columns can not be uniquely factored as A=QR!\n"
            )
    print("=" * 50, "\norigin matrix type: {m} * {n}".format(m=m, n=n),
          "\nOrigin Matrix A = ")
    print_mat(matrix, m, n)
    print("\nThe factorization is processing!\n ")
    if args.mode == "LU":
        P, L, U = LU_factorization(matrix)
        m, n = P.shape
        print("L=")
        print_mat(L, m, m)
        print("U=")
        print_mat(U, m, m)
        print("P=")
        print_mat(P, m, m)
    elif args.mode == "QR":
        Q, R = QR(matrix)
        m, n = Q.shape
        m1, n1 = R.shape
        print("Q=")
        print_mat(Q, m, n)
        print("R=")
        print_mat(R, m1, n1)
    elif args.mode == "Householder":
        Q, R = Householder_Reduction(matrix)
        m, n = Q.shape
        m1, n1 = R.shape
        print("Q=")
        print_mat(Q, m, n)
        print("R=")
        print_mat(R, m1, n1)
    elif args.mode == "Givens":
        Q, R = Givens_Reduction(matrix)
        m, n = Q.shape
        m1, n1 = R.shape
        print("Q=")
        print_mat(Q, m, n)
        print("R=")
        print_mat(R, m1, n1)
    elif args.mode == "URV":
        U, R, V = URV(matrix)
        m, n = U.shape
        m1, n1 = R.shape
        m2, n2 = V.shape
        print("U=")
        print_mat(U, m, n)
        print("R=")
        print_mat(R, m1, n1)
        print("V=")
        print_mat(V, m2, n2)
Exemple #17
0
def show_pixel(mat):
    replace = lambda x:'M' if x != 0 else ' '
    print_mat(mat,replace=replace)
Exemple #18
0
            mat[i, row] = factor
            plus = -1 * factor * mat[row, row + 1:-1]
            mat[i, row + 1:-1] += plus
        temp = mat.copy()
        # print_mat(mat,m,n)
    U = np.triu(mat[:, :-1], 0)
    for i in range(m):
        temp[i, i] = 1
    L = np.tril(temp[:, :-1], 0)
    P = np.zeros(shape=(m, n - 1))
    for j in range(m):
        row_idx = int(mat[j, -1])
        P[j, row_idx - 1] = 1
    return P, L, U


if __name__ == "__main__":
    path = r"data.txt"
    matrix = load_mat(path, "LU")
    if matrix.size == 0:
        print("input Error!")
        sys.exit()
    P, L, U = LU_factorization(matrix)
    m, n = P.shape
    print("L=")
    print_mat(L, m, m)
    print("U=")
    print_mat(U, m, m)
    print("P=")
    print_mat(P, m, m)
Exemple #19
0
    输出U为mxm正交矩阵,R为nxn正交矩阵,R为分块矩阵mxn
    """
    Q, R = Givens_Reduction(mat)
    # print(R)
    r = np.linalg.matrix_rank(mat)
    mat1 = R[0:r, :]
    Q1, R1 = Givens_Reduction(mat1.T)
    # Q1, R1 = Givens_Reduction(R.T)
    U = Q
    V = Q1
    R1 = np.dot(np.dot(U.T, mat), V)
    return U, R1, V


if __name__ == "__main__":
    path = r'data.txt'
    matrix = load_mat(path, "HR")
    if matrix.size == 0:
        print("input Error!")
        sys.exit()
    U, R, V = URV(matrix)
    m, n = U.shape
    m1, n1 = R.shape
    m2, n2 = V.shape
    print("U=")
    print_mat(U, m, n)
    print("R=")
    print_mat(R, m1, n1)
    print("V=")
    print_mat(V, m2, n2)
    print(np.dot(np.dot(U, R), V.T))
Exemple #20
0
            norm = math.sqrt(R[i, i] ** 2 + cur_col[j] ** 2)
            P_cnt = np.identity(m)
            P_cnt[i, i] = R[i, i] / norm                #计算旋转矩阵P_cnt
            P_cnt[i, i + j + 1] = cur_col[j] / norm
            P_cnt[i + j + 1, i] = -cur_col[j] / norm
            P_cnt[i + j + 1, i + j + 1] = R[i, i] / norm

            Q = np.dot(P_cnt, Q)    #Q=P1P2P3...Pn
            R = np.dot(P_cnt, R)    #R=P1P2..PnA
            # Q = np.round(np.dot(P_cnt, Q), 4)
            # R = np.round(np.dot(P_cnt, R), 4)
    return np.round(Q.T, 3), np.round(R, 3)  #保留三位
    # return Q.T,R


if __name__ == "__main__":
    path = r'data.txt'
    matrix = load_mat(path, "HR")
    if matrix.size == 0:
        print("input Error!")
        sys.exit()
    Q, R = Givens_Reduction(matrix)
    a = np.round(np.dot(Q, R), 2)
    print(a)
    m1, _ = Q.shape
    m, n = R.shape
    print("Q=")
    print_mat(Q, m1, m1)
    print("R=")
    print_mat(R, m, n)
Exemple #21
0
def show_pixel(mat):
    replace = lambda x: 'M' if x == 255 else ' '
    print_mat(mat, replace=replace)