Python Pretreatment Exemples, Pretreatment Python Exemples

Exemple #1

0

Afficher le fichier

def MainFunction():
    #CRIEA Start!
    if os.path.exists("Output/"):
        if Init.SystemJudge() == 0:
            os.system("rm -r Output")
        else:
            os.system("rmdir /s /q directory")

    NameArr = Pretreatment.FigureInput(1)
    try:
        if NameArr == -1:
            return
    except:
        pass

    #Figure traversal
    for kase in range(0, len(NameArr)):
        img = np.array(Image.open(NameArr[kase]).convert("L"))
        img = Pretreatment.BFSmooth(img)

        [Tobimg, NodeInfo] = Algorithm.Toboggan(img)
        [Upground, Background] = Algorithm.HandSeed(Tobimg, img, Surround)
        Seeds = Upground | Background
        ProbBlock = []
        VarL = 0

        if Method == "Lap":
            NodeInfo, VarL = Functions.SeedFirst(NodeInfo, Seeds)
            LapEqu = Algorithm.Laplacian(NodeInfo, VarL)
            ProbBlock = Functions.LinearEquation(LapEqu,
                                                 len(NodeInfo) - VarL, VarL)
        """

Exemple #2

0

Afficher le fichier

Fichier : NEW_PTM_WKNN.py Projet : YOCdot/KNNs

def NEW_PTM_WKNN(data_set):
    train, test = Pretreatment.partitioningTPMKNN(data_set)
    normalized_train_matrix = Pretreatment.normalization(train)
    normalized_test_matrix = Pretreatment.normalization(test)
    klist = np.zeros((normalized_train_matrix.shape[0], 1))
    predictions = np.zeros((normalized_test_matrix.shape[0], 1))
    for unknowid in range(0,
                          len(normalized_train_matrix)):  # 给训练集样本加入标签local k
        klist[unknowid] = PTM_KNN.trainlocal_k(normalized_train_matrix,
                                               unknowid)
    new_normalized_train_matrix = np.column_stack(
        (normalized_train_matrix, klist))
    for unknowid_text in range(
            0, len(normalized_test_matrix)):  # 选取测试集样本最近三个点的最大local k作为最佳k
        best_c = bestc(new_normalized_train_matrix, normalized_test_matrix,
                       unknowid_text, data_set)  # 循环选取c值
        new_k1 = PTM_KNN.testlocal_k(new_normalized_train_matrix,
                                     normalized_test_matrix, unknowid_text,
                                     best_c)  #通过c值选取k值
        predictions[unknowid_text] = WKNN.weighted_knn(
            unknowid_text, normalized_train_matrix, int(new_k1),
            normalized_test_matrix)
        # 计算邻近k个值中分类权重最高的分类作为预测分类
    TP, FP, FN, TN = analysis.TPFPFNTN(normalized_test_matrix, predictions)
    return analysis.recall(TP,
                           FN), analysis.F_score(TP, FP, FN), analysis.G_mean(
                               TP, FN, TN, FP), TP, FN, TN, FP

Exemple #3

0

Afficher le fichier

Fichier : Works.py Projet : GeorgeKAHChen/CRIEA

def BWError():
	if os.path.exists("Output/"):
		if Init.SystemJudge() == 0:
			os.system("rm -r Output")
		else:
			os.system("rmdir /s /q directory")

	NameArr = Pretreatment.FigureInput(1)
	try:
		if NameArr == -1:
			return
	except:
		pass

	#Figure traversal
	for kase in range(0, len(NameArr)):
		img = np.array(Image.open(NameArr[kase]).convert("L"))	
		"""		
		for i in range(0, len(img)):
			for j in range(0, len(img[i])):
				if random.randint(1, 50) == 1:
					img[i][j] += np.random.normal(img[i][j], 64)
					img[i][j] = max(0, img[i][j])
					img[i][j] = min(255, img[i][j])
		"""
		Name = "Figure_"
		Name += str(Init.GetTime())
		Name += ".png"
		Pretreatment.Output(img, Name, 2)

Exemple #4

0

Afficher le fichier

Fichier : NEW_PTM_WKNN.py Projet : YOCdot/KNNs

def bestc(normalized_train_matrix, normalized_test_matrix, unknowid, data_set):
    unknow_instance = normalized_test_matrix[unknowid, :-1]  # 未知样例
    unknow_distance = distance.e_distance_calculation(
        unknow_instance, normalized_train_matrix[:, :-2])
    label_vector = normalized_train_matrix[:, -1]
    labeled_distance = np.column_stack(
        (unknow_distance, label_vector))  # 距离与对应k标签合并
    sorted_labeled_distance = labeled_distance[
        labeled_distance[:, 0].argsort()]  # 排序
    max = -1
    real_accuracy = 0
    for cnumber in range(1, 3):  # 循环取c值
        for c in range(0, cnumber):  # 循环选取周围c个数中最大的标签k值
            if sorted_labeled_distance[c][1] > max:
                max = sorted_labeled_distance[c][1]
        a = 0
        for k in range(1, 11):  # 十折交叉验证
            train, test = Pretreatment.partitioning(data_set, k)  # 十折拆分数据集
            normalized_train_matrix = Pretreatment.normalization(train)
            normalized_test_matrix = Pretreatment.normalization(test)
            predictions = np.zeros(
                (normalized_test_matrix.shape[0], 1))  # 初始化预测结果集合
            for unknowid_text in range(0, len(normalized_test_matrix)):
                predictions[unknowid_text] = WKNN.weighted_knn(
                    unknowid_text, normalized_train_matrix, int(max),
                    normalized_test_matrix)
            accuracy = analysis.getAccuracy(normalized_test_matrix,
                                            predictions)
            a = accuracy + a
        thistest_accuracy = float(a / 10)
        if thistest_accuracy > real_accuracy:  # 保存十折交叉验证后精度最高的c值
            real_accuracy = thistest_accuracy
            best_c = cnumber
    return best_c

Exemple #5

0

Afficher le fichier

def overtrace_area(temp1, temp2):  # 重叠区域
    d1 = Pretreatment.distance(temp1[0][0], temp1[0][1], temp2[0][0],
                               temp2[0][1])
    d2 = Pretreatment.distance(temp1[0][0], temp1[0][1], temp2[1][0],
                               temp2[1][1])
    if d1 < d2:
        c = temp2[0]
        a = find_minpoint(c, temp1)
    else:
        a = temp1[0]
        c = find_minpoint(a, temp2)
    d3 = Pretreatment.distance(temp1[len(temp1) - 1][0],
                               temp1[len(temp1) - 1][1],
                               temp2[len(temp2) - 1][0],
                               temp2[len(temp2) - 1][1])
    d4 = Pretreatment.distance(temp1[len(temp1) - 1][0],
                               temp1[len(temp1) - 1][1],
                               temp2[len(temp2) - 2][0],
                               temp2[len(temp2) - 2][1])
    if d3 > d4:
        b = temp1[len(temp1) - 1]
        d = find_minpoint(b, temp2)
    else:
        d = temp2[len(temp2) - 1]
        b = find_minpoint(d, temp1)
    temp = [a, b, c, d]
    # print("四个点：", temp)
    return temp

Exemple #6

0

Afficher le fichier

def extend(temp1, temp2):  # 延伸类笔画
    d1 = Pretreatment.distance(temp1[0][0], temp1[0][1], temp2[0][0],
                               temp2[0][1])
    d2 = Pretreatment.distance(temp1[0][0], temp1[0][1], temp2[1][0],
                               temp2[1][1])
    if d1 < d2:
        ftemp = temp1
        btemp = temp2
    else:
        ftemp = temp2
        btemp = temp1
    point = overtrace_area(ftemp, btemp)
    a = ftemp.index(point[0])
    b = ftemp.index(point[1])
    c = btemp.index(point[2])
    d = btemp.index(point[3])
    # print("a,b,c,d", a, b, c, d)
    if a > b:
        t = b
        b = a
        a = t
    if c > d:
        t = d
        d = c
        c = t
    # x = (point[0][0] + point[1][0] + point[2][0] + point[3][0]) / 4
    # y = (point[0][1] + point[1][1] + point[2][1] + point[3][1]) / 4
    # p1 = ftemp[a:b + 1]
    # p2 = btemp[c:d + 1]
    # # 找重叠区域的中点到两条笔画上最小距离的点
    # n = find_minpoint([x, y], p1)
    # m = find_minpoint([x, y], p2)
    # i = ftemp.index(n)
    # j = btemp.index(m)
    # ntemp1 = ftemp[0:i + 1]
    # ntemp2 = btemp[j:len(btemp)]
    # mx, my = (n[0] + m[0]) / 2, (n[1] + m[1]) / 2
    # new_temp1 = Chord_weighting(ntemp1, [mx, my])  # 弦长加权法求新的数据点
    # new_temp1.append([mx, my])
    # ntemp2.reverse()
    # new_temp2 = Chord_weighting(ntemp2, [mx, my])
    # new_temp2.reverse()
    # new_temp = new_temp1 + new_temp2
    new_temp = []
    for i in range(a + 1):
        new_temp.append(ftemp[i])
    Mtemp = btemp[c:d + 1]
    Mtemp.reverse()
    ntemp = Chord_weighting(Mtemp, point[0])
    i = len(ntemp) - 1
    while i >= 0:
        new_temp.append(ntemp[i])
        i -= 1
    for i in range(d, len(btemp)):
        new_temp.append(btemp[i])
    print("new_temp", new_temp)
    return new_temp

Exemple #7

0

Afficher le fichier

def WKNN(data_set,best_k):
    train, test = Pretreatment.partitioningTPMKNN(data_set)
    normalized_train_matrix = Pretreatment.normalization(train)
    normalized_test_matrix = Pretreatment.normalization(test)
    predictions = np.zeros((normalized_test_matrix.shape[0], 1))
    for unknowid_text in range(0, len(normalized_test_matrix)):
        predictions[unknowid_text] = weighted_knn(unknowid_text, normalized_train_matrix, int(best_k), normalized_test_matrix)
    TP, FP, FN, TN = analysis.TPFPFNTN(normalized_test_matrix, predictions)
    return analysis.recall(TP, FN), analysis.F_score(TP, FP, FN), analysis.G_mean(TP, FN, TN, FP),TP, FN, TN, FP

Exemple #8

0

Afficher le fichier

def the_direction_of_the_stroke(temp1, temp2):  # 用重叠区域首尾变化的方向来判断笔画的方向
    d1 = Pretreatment.distance(temp1[0][0], temp1[0][1], temp2[0][0],
                               temp2[0][1])
    d2 = Pretreatment.distance(temp1[0][0], temp1[0][1],
                               temp2[len(temp2) - 1][0],
                               temp2[len(temp2) - 1][1])
    if d1 < d2:
        return True
    else:
        return False

Exemple #9

0

Afficher le fichier

Fichier : Fit_curve.py Projet : PaopaoMu/MultiStrokes

def Three_point_chord_weighting(p1, p2, p3):  # 求p2斜率
    p1 = np.array(p1)
    p2 = np.array(p2)
    p3 = np.array(p3)
    v1 = p2 - p1
    v2 = p3 - p2
    l1 = Pretreatment.distance(p1[0], p1[1], p2[0], p2[1])
    l2 = Pretreatment.distance(p2[0], p2[1], p3[0], p3[1])
    nv1 = np.linalg.norm(v1)
    nv2 = np.linalg.norm(v2)
    t = (l1 / l2) * (v2 / nv2) + (l2 / l1) * (v2 / nv2)
    # print("切线方向", t)
    return t

Exemple #10

0

Afficher le fichier

    def doMain(self):
        ioFunctions = IOFunctions.IOFunctions()
        pretreatment = Pretreatment.Pretreatment()

        raw = ioFunctions.ReadFile('D:\\codes\\Python\\PythonSpace\\NLTKTest\\datas\\articles.txt')
        #清除Unicode标签u"和u'
        raw = re.sub("u\"","",raw)
        raw = re.sub("u\'","",raw)
        
        sents = pretreatment.SenToken(raw)
        cleanSents  = pretreatment.CleanSents(sents)
        words = pretreatment.WordToken(cleanSents)
        stemWords = pretreatment.StemWords(words)
        tagged_words = pretreatment.TagWords(stemWords)
        # chunked_words = pretreatment.ChunkWords(tagged_words)
        # iob = pretreatment.IOBTree(chunked_words)

        #将处理完的语料保存到output文件中去
        file = open("datas\\output.txt","w")
        file.truncate()
        for line in tagged_words:
            string=""
            for mtuple in line:
                for word in mtuple:
                    string += word+" "
                string+="\n"
            string+="\n"
            file.write(string)

        file.close()

Exemple #11

0

Afficher le fichier

Fichier : Works.py Projet : GeorgeKAHChen/CRIEA

def FouriorTrans():
	if os.path.exists("Output/"):
		if Init.SystemJudge() == 0:
			os.system("rm -r Output")
		else:
			os.system("rmdir /s /q directory")

	NameArr = Pretreatment.FigureInput(1)
	try:
		if NameArr == -1:
			return
	except:
		pass

	#Figure traversal
	for kase in range(0, len(NameArr)):
		img = np.array(Image.open(NameArr[kase]).convert("L"))	
		Statistic = [0 for n in range(0, 260)]
		TTL = 0
		for i in range(0, len(img)):
			for j in range(0, len(img[i])):
				Statistic[img[i][j]] += 1
				TTL += 1

		#Drecrete PDE
		Prob = [0.00 for n in range(260)]
		for i in range(0, len(Prob)):
			Prob[i] = Statistic[i] / TTL
		
		#HF = np.fft.fft(Prob).real
		
		fig1 = plt.figure()
		ax = fig1.add_subplot(111)
		
		plt.xlim(-1, 260)
		plt.ylim(0, 0.2)

		#Printing loop
		for i in range(0, len(Prob)):
			ax.add_patch(patches.Rectangle((i, 0), 1, Prob[i], color = 'black'))

		Name = ""
		Hajimari = False
		for i in range(len(NameArr[kase]) - 1, -1, -1):
			if NameArr[kase][i] == ".":
				Hajimari = True
				continue
			elif NameArr[kase][i] == "/":
				break
			else:
				if Hajimari == False:
					continue
				else:
					Name = NameArr[kase][i] + Name

		Name += "_Histogram.png"
		print(Name)
		plt.savefig(Name)
		
	return

Exemple #12

0

Afficher le fichier

def is_overdraw(max_temp, min_temp):  #判断是否重叠
    l = the_lengthest(max_temp)
    max_corpoint = break_point.turning_point1(max_temp)  #折线化的点
    temp1 = []  # 短笔画落入重叠区域的采样点
    for i in range(1, len(max_corpoint)):
        f_point = max_corpoint[i - 1]
        b_point = max_corpoint[i]
        R, T = coordinate_transformation(f_point, b_point)
        length = Pretreatment.distance(f_point[0], f_point[1], b_point[0],
                                       b_point[1])
        x = length / 2  #容差长度范围
        y = 3 / 4 * float('%.2f' % (length**0.5)) + 2  # 找到容差宽度
        h = 0
        while h < len(min_temp):
            X = np.array([min_temp[h][0], min_temp[h][1], 1])
            Y = R @ T @ X
            r1 = ((min_temp[h][0] - f_point[0])**2 +
                  (min_temp[h][1] - f_point[1])**2)
            r2 = ((min_temp[h][0] - b_point[0])**2 +
                  (min_temp[h][1] - b_point[1])**2)
            if math.fabs(Y[0]) <= x and math.fabs(
                    Y[1]) <= y or r1 <= y * y or r2 <= y * y:
                if min_temp[h] in temp1:
                    pass
                else:
                    a = min_temp[h]
                    temp1.append(a)
            h += 1
    k = len(temp1) / len(min_temp)
    return temp1, k

Exemple #13

0

Afficher le fichier

Fichier : is_extend_stroke.py Projet : PaopaoMu/MultiStrokes

def extend_stroke(temp1, temp2):
    l = tolerance_zone.the_lengthest(temp1)
    y = 3 / 4 * float('%.2f' % (l**0.5)) + 2  # 找到容差宽度
    max_corpoint = break_point.turning_point(temp1)  # 折线化的点
    max_corpoint.append(temp2[int(len(temp2) / 2)])
    max_corpoint.append(temp2[len(temp2) - 1])
    temp = []
    for i in range(1, len(max_corpoint)):
        f_point = max_corpoint[i - 1]
        b_point = max_corpoint[i]
        # print(f_point,b_point)
        R, T = tolerance_zone.coordinate_transformation(f_point, b_point)
        length = Pretreatment.distance(f_point[0], f_point[1], b_point[0],
                                       b_point[1])
        x = length / 2  # 容差长度范围
        h = 0
        while h < len(temp2):
            X = np.array([temp2[h][0], temp2[h][1], 1])
            Y = R @ T @ X
            r1 = ((temp2[h][0] - f_point[0])**2 +
                  (temp2[h][1] - f_point[1])**2)
            r2 = ((temp2[h][0] - b_point[0])**2 +
                  (temp2[h][1] - b_point[1])**2)
            if math.fabs(Y[0]) <= x and math.fabs(
                    Y[1]) <= y or r1 <= y * y or r2 <= y * y:
                if temp2[h] in temp:
                    pass
                else:
                    a = temp2[h]
                    temp.append(a)
            h += 1
    k = len(temp) / len(temp2)
    print("延伸笔画的比率", k)
    return k

Exemple #14

0

Afficher le fichier

def line_length(temp):
    l = 0
    for i in range(1, len(temp)):
        a = temp[i - 1]
        b = temp[i]
        d = Pretreatment.distance(a[0], a[1], b[0], b[1])
        l = l + d
    return l

Exemple #15

0

Afficher le fichier

Fichier : is_extend_stroke.py Projet : PaopaoMu/MultiStrokes

def is_extend(temp1, temp2):
    d1 = Pretreatment.distance(temp1[0][0], temp1[0][1], temp2[0][0],
                               temp2[0][1])
    d2 = Pretreatment.distance(temp1[0][0], temp1[0][1], temp2[1][0],
                               temp2[1][1])
    if d1 < d2:
        ftemp = temp1
        btemp = temp2
    else:
        ftemp = temp2
        btemp = temp1
    if angle(ftemp, btemp) and angle_bisector(
            ftemp, btemp) and extend_stroke(ftemp, btemp) > 0.8:
        # if  angle_bisector(ftemp, btemp) :
        return True
    else:
        return False

Exemple #16

0

Afficher le fichier

def the_lengthest(temp):  #最大的弦长笔画
    s1 = temp[0]
    d_max = 0
    for i in range(1, len(temp)):
        s2 = temp[i]
        d = Pretreatment.distance(s1[0], s1[1], s2[0], s2[1])
        if d > d_max:
            d_max = d
    return d_max

Exemple #17

0

Afficher le fichier

Fichier : PTM_KNN.py Projet : YOCdot/KNNs

def PTM_KNN(data_set):
    train, test = Pretreatment.partitioningTPMKNN(data_set)
    normalized_train_matrix = Pretreatment.normalization(train)
    normalized_test_matrix = Pretreatment.normalization(test)
    klist = np.zeros((normalized_train_matrix.shape[0], 1))
    predictions = np.zeros((normalized_test_matrix.shape[0], 1))
    for unknowid in range(0, len(normalized_train_matrix)):
        klist[unknowid] = trainlocal_k(normalized_train_matrix, unknowid)
    new_normalized_train_matrix = np.column_stack(
        (normalized_train_matrix, klist))
    for unknowid_text in range(0, len(normalized_test_matrix)):
        new_k = testlocal_k(new_normalized_train_matrix,
                            normalized_test_matrix, unknowid_text, 3)
        predictions[unknowid_text] = KNN.traditional_knn(
            unknowid_text, normalized_train_matrix, int(new_k),
            normalized_test_matrix)
    TP, FP, FN, TN = analysis.TPFPFNTN(normalized_test_matrix, predictions)
    return analysis.recall(TP,
                           FN), analysis.F_score(TP, FP, FN), analysis.G_mean(
                               TP, FN, TN, FP), TP, FN, TN, FP

Exemple #18

0

Afficher le fichier

Fichier : KNN.py Projet : YOCdot/KNNs

def get_KNNbestK(data_set):
    real_accuracy = 0
    beat_k = 1
    for ks in range(1, 20):  # 循环取k值
        a = 0
        for k in range(1, 11):
            train, test = Pretreatment.partitioning(data_set, k)  # 十折拆分数据集
            normalized_train_matrix = Pretreatment.normalization(train)
            normalized_test_matrix = Pretreatment.normalization(test)
            predictions = np.zeros(
                (normalized_test_matrix.shape[0], 1))  # 初始化预测结果集合
            for unknowid_text in range(0, len(normalized_test_matrix)):
                predictions[unknowid_text] = traditional_knn(
                    unknowid_text, normalized_train_matrix, int(ks),
                    normalized_test_matrix)
            accuracy = analysis.getAccuracy(normalized_test_matrix,
                                            predictions)
            a = accuracy + a
        thistest_accuracy = float(a / 10)
        if thistest_accuracy > real_accuracy:
            real_accuracy = thistest_accuracy
            best_k = ks
    return best_k

Exemple #19

0

Afficher le fichier

Fichier : Works.py Projet : GeorgeKAHChen/CRIEA

def MainFunction():
	if os.path.exists("Output/"):
		if Init.SystemJudge() == 0:
			os.system("rm -r Output")
		else:
			os.system("rmdir /s /q directory")

	NameArr = Pretreatment.FigureInput(1)
	try:
		if NameArr == -1:
			return
	except:
		pass

	#Figure traversal
	for kase in range(0, len(NameArr)):
		img = np.array(Image.open(NameArr[kase]).convert("L"))	
		img1 = img.deepcopy()

Exemple #20

0

Afficher le fichier

Fichier : break_point.py Projet : PaopaoMu/MultiStrokes

def curve_change_line(point1, point2, line, t):  #折线化
    d = 0
    x_max = 0
    y_max = 0
    line_d = Pretreatment.distance(point1[0], point1[1], point2[0], point2[1])
    # print(line)
    for each in line:
        dis = getDis(each[0], each[1], point1[0], point1[1], point2[0],
                     point2[1])
        if dis >= d:
            d = dis
            x_max = each[0]
            y_max = each[1]
    if d > t * line_d:
        point_max = [x_max, y_max]
    else:
        point_max = []
    return point_max

Exemple #21

0

Afficher le fichier

def Chord_weighting(temp, p):  # 弦长加权法求新的数据点
    # print("弦长加权：", temp, p)
    distance = []
    s = 0
    for i in range(1, len(temp)):
        dis = Pretreatment.distance(temp[i - 1][0], temp[i - 1][1], temp[i][0],
                                    temp[i][1])
        s += dis
        distance.append(dis)
    x1, y1 = (p[0] - temp[len(temp) - 1][0]) / 2, (p[1] -
                                                   temp[len(temp) - 1][1]) / 2
    new_temp = [temp[0]]
    l = 0
    for i in range(len(distance)):
        l += distance[i]
        w = l / s
        x, y = temp[i + 1][0] + w * x1, temp[i + 1][1] + w * y1
        new_temp.append([x, y])
    return new_temp

Exemple #22

0

Afficher le fichier

Fichier : Tools.py Projet : GeorgeKAHChen/CRIEA

def CNNSeed(img, TobImg, BlockSize, FileName):
	BlockInfo = [0 for n in range(BlockSize)]
	for i in range(0, len(BlockSize)):
		print(str(BlockSize) + "\t:")
		OutImg = [[0 for n in ragne(len(TobImg[0]))] for n in range(TobImg)]
		for p in range(0, len(TobImg)):
			for q in range(0, len(TobImg[p])):
				if TobImg[p][q] == i:
					OutImg[p][q] = img[p][q]
		Pretreatment.FigurePrint(OutImg, kind)
		InpInt = Init.IntInput(str(BlockSize) + "\t:" , "1", "3", "int")
		if InpInt == 1:
			BlockInfo[i] = 1
		elif InpInt == 2:
			BlockInfo[i] = 2
		elif InpInt == 3:
			BlockInfo[i] = 3

	BuildFile("Histogram")
	File = open("Histogram", "a")

Exemple #23

0

Afficher le fichier

Fichier : DevRec.py Projet : KAndHisC/DevRec

import numpy as np
import string
from gensim import corpora
import Pretreatment

# 新建LDA模型
lda = Pretreatment.LDA()
Corpus, Product, Component, Developer = lda.read_csv(dir="AspectJ.csv",
                                                     summary=2,
                                                     description=3,
                                                     product=4,
                                                     component=5,
                                                     assigned_to=6,
                                                     comment=7)
Terms, Topics = lda.build_from_corpus(Corpus)

# 生成MLkNN
mlknn = Pretreatment.BR_MLkNN()
Terms = mlknn.makematrix(Terms, lenth=len(lda.dictionary))
Topics = mlknn.makematrix(Topics, lenth=lda.num_topics)
X, y = mlknn.make_Xy(Terms=Terms,
                     Topics=Topics,
                     Product=Product,
                     Component=Component,
                     Developer=Developer)

step = int(len(X) / 11)
# 分11组验证
# 仅br分析的结果
for n in range(step, len(X) - step, step):
    testX_Validation = X[n:n + step, :]

Exemple #24

0

Afficher le fichier

import Pretreatment
import KNN
'''
# Generate Image
trainDatas, trainLabels = Pretreatment.loadTrainData('/home/hadoop/workdatas/kaggle/DigitRecognizer/train_sort.csv');
trainLabels = trainLabels[0];
Pretreatment.generateImage('/home/hadoop/workdatas/kaggle/DigitRecognizer/imgs/', trainDatas, trainLabels);
'''
'''
# KNN Test
import numpy
group = numpy.array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ['A', 'A', 'B', 'B']

tar = [1.0, 1.2];

result = KNN.classify(tar, group, labels, 3);

print result;
'''

# KNN Clasify
trainDatas, trainLabels = Pretreatment.loadTrainData(
    '/home/hadoop/workdatas/kaggle/DigitRecognizer/train.csv')
trainLabels = trainLabels[0]
testDatas = Pretreatment.loadTestData(
    '/home/hadoop/workdatas/kaggle/DigitRecognizer/test.csv')
result = KNN.process(testDatas, trainDatas, trainLabels)
Pretreatment.generateResultFile(
    '/home/hadoop/workdatas/kaggle/DigitRecognizer/result_knn_10.csv', result)

Exemple #25

0

Afficher le fichier

import sys
import subprocess

print("-----------------------------------------")
file_name = input(
    "输入'/帮助',则进入SCPython的使用帮助\n输入'/自定义',则进入自定义模式\n输入SCPython代码文件地址则执行解释\n\n请输入："
)

while file_name == "":

    file_name = input("您刚才未输入指令，请重新输入：")

if file_name == "/帮助":
    pass
elif file_name == "/自定义":
    pass
else:
    file = open(file_name, encoding='utf-8')
    file_text = file.read()

    List_Code = Pretreatment.Decomposition_Code(file_text)

    Text = Translation.Translation_Code(List_Code)

    file_new = open(os.environ["TMP"] + "\SCPYFile.py", "w", encoding='utf-8')
    file_new.write(Text)
    file_new.close()
    os.system('cls')

    os.system(os.environ["TMP"] + r'"\SCPYFile.py"')

Exemple #26

0

Afficher le fichier

def HandSeed(Tobimg, img, Surround):
    Upground = set()
    Background = set()

    for Kase in range(0, 2):
        Owari = False
        while 1:
            print(
                "\nInput the location you choice as '[i1, j1] [i2, j2]'. \nAfter choose, close the figure and press Enter to continue."
            )
            if Kase == 0:
                print("Upground Set:  ")
            elif Kase == 1:
                print("Background Set:  ")

            #Print the figure
            if Surround == "Nor":
                Pretreatment.FigurePrint(img, 2)
            else:
                pass

            #Pretreatment
            InpStr = input()
            RemStr1 = ""
            RemStr2 = ""
            Str2Int = False
            Error = False
            kind = 0

            #Get string
            for i in range(0, len(InpStr)):
                #Partial
                if InpStr[i] == "[":
                    kind = 1
                    continue
                if InpStr[i] == ",":
                    kind = 2
                    continue
                if InpStr[i] == "]":
                    kind = 0
                    Str2Int = True

                if kind == 1 and Str2Int == False:
                    RemStr1 += InpStr[i]
                if kind == 2 and Str2Int == False:
                    RemStr2 += InpStr[i]

                if Str2Int == True:
                    Int1 = 0
                    Int2 = 0
                    #print([RemStr1,RemStr2])
                    try:
                        Int1 = int(RemStr2)
                        Int2 = int(RemStr1)
                    except:
                        print("Input Error, Please input points again")
                        Error = True
                        break

                    #print(Tobimg[Int1][Int2])
                    try:
                        if Kase == 0:
                            Upground.add(Tobimg[Int1][Int2])
                        if Kase == 1:
                            Background.add(Tobimg[Int1][Int2])

                    except:
                        print("Input Error, Location exceed")

                    RemStr1 = ""
                    RemStr2 = ""
                    Str2Int = False
                    continue

            if Error == True:
                continue
            else:
                if Kase == 0:
                    if len(Upground) == 0:
                        print("You must input at least 1 node!")
                        continue
                    else:
                        Owari = True

                elif Kase == 1:
                    if len(Background) == 0:
                        print("You must input at least 1 node!")
                        continue
                    else:
                        Owari = True
            if Owari == True:
                break
            else:
                continue

    return Upground, Background

Exemple #27

0

Afficher le fichier

if __name__ == "__main__":
    list_tweets = []
    #list_tweets = Data.GetListTweets("output/datas_tweets.txt", 5000)
    date_start = '2019-09-05'
    date_end = '2019-09-06'
    n = 10  #决定聚类数量
    pretreatment_result = "output/" + date_start + "_" + date_end + "_" + "pretreatment_result" + ".txt"
    hotspot_result = "output/" + date_start + "_" + date_end + "_" + "hotspot_result" + ".txt"

    # #实验获取手肘值
    # for f in Select_date(date_start,date_end):
    #     list_tweets.extend(Data.GetListTweets(f))
    # list_result = Pretreatment.Pretreatment(list_tweets, "input/sensitive.txt", "input/emoji.txt", "input/stopwords.txt")
    # count = Data.OutputToFile(list_result, pretreatment_result)

    # list_tweets = Data.GetListTweets(pretreatment_result)

    for f in Select_date(date_start, date_end):
        list_tweets.extend(Data.GetListTweets(f))

    list_result = Pretreatment.Pretreatment(list_tweets, "input/sensitive.txt",
                                            "input/emoji.txt",
                                            "input/stopwords.txt")
    count = Data.OutputToFile(list_result, pretreatment_result)

    list_tweets = Data.GetListTweets(pretreatment_result)
    dict_hotSpot = K_MEANS.GetDictHotSpot(list_tweets, count, n)
    list_hotSpot = GetListHotSpot(dict_hotSpot)

    Output(list_hotSpot, hotspot_result)

Exemple #28

0

Afficher le fichier

Fichier : __init__.py Projet : zerozzl/MLStudy

import Pretreatment
import KNN

'''
# Generate Image
trainDatas, trainLabels = Pretreatment.loadTrainData('/home/hadoop/workdatas/kaggle/DigitRecognizer/train_sort.csv');
trainLabels = trainLabels[0];
Pretreatment.generateImage('/home/hadoop/workdatas/kaggle/DigitRecognizer/imgs/', trainDatas, trainLabels);
'''


'''
# KNN Test
import numpy
group = numpy.array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ['A', 'A', 'B', 'B']

tar = [1.0, 1.2];

result = KNN.classify(tar, group, labels, 3);

print result;
'''

# KNN Clasify
trainDatas, trainLabels = Pretreatment.loadTrainData('/home/hadoop/workdatas/kaggle/DigitRecognizer/train.csv');
trainLabels = trainLabels[0];
testDatas = Pretreatment.loadTestData('/home/hadoop/workdatas/kaggle/DigitRecognizer/test.csv');
result = KNN.process(testDatas, trainDatas, trainLabels);
Pretreatment.generateResultFile('/home/hadoop/workdatas/kaggle/DigitRecognizer/result_knn_10.csv', result);

Exemple #29

0

Afficher le fichier

            xsize = tagger.xsize()
            for i in range(size):
                for j in range(xsize):
                    char = tagger.x(i, j)
                    tag = tagger.y2(i)
                    if tag == 'O':
                        test_result_file.write(char)
                    elif tag == 'B_LOC' or tag == 'B_ORG' or tag == 'B_PRO' or tag == 'B_PER' or tag == 'B_TIME':
                        test_result_file.write('(' + char)
                    elif tag == 'E_LOC' or tag == 'E_ORG' or tag == 'E_PRO' or tag == 'E_PER' or tag == 'E_TIME':
                        test_result_file.write(char + ')' + tag[2:])
                    else:
                        test_result_file.write(char)
            test_result_file.write('\n')

        test_file.close()
        test_result_file.close()


if __name__ == '__main__':
    pre = Pretreatment()
    pre.load_copus()
    name_indicator, loc_indicator, org_indicator, time_indicator, pro_indicator = pre.get_indicator(
    )
    ner = NER()
    # ner.load_copus(name_indicator, loc_indicator, org_indicator, time_indicator, pro_indicator)
    ner.load_test_corpus(name_indicator, loc_indicator, org_indicator,
                         time_indicator, pro_indicator)

    # tagger = CRFPP.Tagger('-m' + crf_model)
    # ner.recognize(tagger)