예제 #1
0
파일: CheckCrp.py 프로젝트: Trongha/myCrp
def predict_diagonal(trainSet,
                     testSet,
                     dim=5,
                     tau=2,
                     epsilon=0.7,
                     lambd=3,
                     percent=0.6,
                     distNorm=1,
                     titleOfGraph='Pretty Girl',
                     figureName='GrilLikeYou',
                     pathSaveFigure=None):

    # # vectors_train = data['train_data']
    # dim = 5 #data['dim']
    # tau = 2 #data['tau']
    # percent = 0.6 #data['percent']
    # # curve_number = data['curve_number']
    # epsilon = 0.7 #data['epsilon']
    # lambd = 2 #data['lambd']

    vectors_train_1 = []
    for i in range(len(trainSet) - (dim - 1) * tau):
        vectors_train_1.append(state_phase(trainSet, i, dim, tau))

    #tách statephases
    vectors_test_1 = []
    for i in range(len(testSet) - (dim - 1) * tau):
        vectors_test_1.append(state_phase(testSet, i, dim, tau))

    #ép kiểu về array
    vectors_train_1 = np.array(vectors_train_1)
    vectors_test_1 = np.array(vectors_test_1)

    # print("train.shape: ", vectors_train_1.shape)

    # r_dist là ma trận khoảng cách
    # cdist là hàm trong numpy
    # minkowski là cách tính
    # p là norm
    # y là train: đánh số từ trên xuống dưới
    # x là test
    r_dist = cdist(vectors_train_1, vectors_test_1, 'minkowski', p=distNorm)

    # epsilon = r_dist.min()
    # print("r_dist:",r_dist)

    print('vectors_train.shape: ', vectors_train_1.shape)  #in ra shape
    print('vectors_test.shape: ', vectors_test_1.shape)
    print('r_dist.shape: ', r_dist.shape)
    print('r_dist min', r_dist.min())
    print('epsilon: ', epsilon)
    print('r_dist max', r_dist.max())
    print('lambd: ', lambd)

    # predict_label = np.zeros((testing_well_ts[:, -1].shape[0], ), dtype=int)

    # indx_vectors_timeseries_test = indx_vectors_timeseries_test.reshape((vectors_test.shape[0], dim))
    # predict_label = np.zeros(len(testSet),dtype=int)

    #r1 là ma trận -2|-1 thay vì ma trận 01 như crp
    #-2 là false
    #-1 là true
    #r_dist < epsilon trả về ma trận 01, 1 tại điểm r_dist < epsilon
    #ma trận này trừ đi 2 thì thành ma trận -2|-1
    r1 = np.array((r_dist < epsilon) - 2)
    # r1 = np.array(r1 - 2)

    f_crp = myCrpFunctions.crossRecurrencePlots("CRP", r1, dotSize=0.2)
    plt.show()

    len_r1 = int(np.size(r1[0]))
    high_r1 = int(np.size(r1) / len_r1)

    #x is an array whose each element is a diagonal of the crp matrix

    #Đoạn này lấy ra những đoạn đường chéo có số lượng predict liền nhau lớn hơn lamdb
    #Mỗi phần tử của diagonals_have_predict là một mảng chứa đúng 1 đoạn trùng với train tương ứng đúng vị trí
    diagonals_have_predict = []
    for index_diagonal in range(-(high_r1 - lambd + 1), len_r1 - lambd + 2, 1):
        offset = index_diagonal
        #---offset = x - y
        y = -offset if (index_diagonal < 0) else 0

        while (y < high_r1 and y + offset < len_r1):
            if (r1[y][y + offset] == -1):
                start = y
                while (y + 1 < high_r1 and y + 1 + offset < len_r1
                       and r1[y + 1][y + 1 + offset] == -1):
                    y += 1
                if (y - start + 1 >= lambd):
                    predicts = np.full(y + 1, -2)

                    for index in range(start, y + 1, 1):
                        predicts[index] = index + offset
                    diagonals_have_predict.append(predicts)
            y += 1

    #Mảng quy đổi về index của dữ liệu test từ index của statePhrase
    #

    indexSampleOrigin = diagonals_have_predict

    for i_row in range(len(indexSampleOrigin)):
        for i_in_1_State in range(len(indexSampleOrigin[i_row])):
            if (indexSampleOrigin[i_row][i_in_1_State] >= 0):
                #Chọn số index state cuối cùng
                if ((i_in_1_State == len(indexSampleOrigin[i_row]) - 1)
                        or (indexSampleOrigin[i_row][i_in_1_State + 1] < 0)):
                    for j in range(((int(dim * percent)) - 1) * tau):
                        indexSampleOrigin[i_row] = np.insert(
                            indexSampleOrigin[i_row], i_in_1_State + j + 1,
                            indexSampleOrigin[i_row][i_in_1_State] + j + 1)

    # Lấy giá trị từ index của sample trong testSet
    valueOfSample = []

    for i in range(len(indexSampleOrigin)):
        arr = []
        for j in range(len(indexSampleOrigin[i])):
            if (indexSampleOrigin[i][j] < 0):
                arr.append(None)
            else:
                arr.append(testSet[indexSampleOrigin[i][j]])
                try:
                    shapePredict[indexSampleOrigin[i][j]] = shape
                except (IndexError):
                    print(
                        "^v^v^v^v^v^v^v^v^v^v^v^v^v^v^indexError: {} ^v^v^v^v^v^v^v^v^v^v^v^v^v^v^"
                        % (indexSampleOrigin[i][j]))

        valueOfSample.append(arr)

    # print(valueOfSample)

    # testSetPr = SyncPredictToTestArr(index_timeseries_test, testSet)
    f_line = plt.figure(figureName, figsize=(12.8, 7.2), dpi=100)

    print("_________num predict: ", len(valueOfSample))

    if (len(valueOfSample) > 0):
        # f_line.set_size_inches(1080, 720)
        for i in range(0, len(valueOfSample)):
            plt.plot(valueOfSample[i], ':', label=i)

        plt.plot(trainSet, 'r', label='train')
        plt.legend(loc=0)
        plt.xlabel('index')
        plt.ylabel('feature')

        titleOfGraph = titleOfGraph + " - lamb_" + str(
            lambd) + ' - minkowski_' + str(distNorm) + " - numPredict_" + str(
                len(valueOfSample))
        plt.title(titleOfGraph)

        if (pathSaveFigure != None):
            plt.savefig(pathSaveFigure, dpi=200)
        # plt.ylim(ymin = min(trainSet) - 0.09)
        # plt.show()

    return f_line
예제 #2
0
def predict_diagonal(trainSet, testSet, dim=5, tau=2, epsilon=0.7, lambd=3, percent=0.6, titleOfGraph = 'Pretty Girl'):

	# # vectors_train = data['train_data']
	# dim = 5 #data['dim']
	# tau = 2 #data['tau']
	# percent = 0.6 #data['percent']
	# # curve_number = data['curve_number']
	# epsilon = 0.7 #data['epsilon']
	# lambd = 2 #data['lambd']

	vectors_train_1 = []
	for i in range(len(trainSet)-(dim-1)*tau):
		vectors_train_1.append(state_phase(trainSet, i, dim, tau)) 

	#tách statephases
	vectors_test_1 = []
	for i in range(len(testSet)-(dim-1)*tau):
		vectors_test_1.append(state_phase(testSet, i, dim, tau))

		#ép kiểu về array
	vectors_train_1 = np.array(vectors_train_1)
	vectors_test_1 = np.array(vectors_test_1)

	print("train.shape: ", vectors_train_1.shape)

	# r_dist là ma trận khoảng cách
	# cdist là hàm trong numpy
	# minkowski là cách tính
	# p là norm
	# y là train: đánh số từ trên xuống dưới
	# x là test
	r_dist = cdist(vectors_train_1, vectors_test_1, 'minkowski', p=1)
	
	# print("r_dist:",r_dist)

	print('vectors_train.shape: ', vectors_train_1.shape) #in ra shape
	print('vectors_test.shape: ', vectors_test_1.shape)
	print('r_dist.shape: ', r_dist.shape)
	print('r_dist min', r_dist.min())
	print('epsilon: ', epsilon)
	print('r_dist max', r_dist.max())
	print('lambd: ', lambd)

	# predict_label = np.zeros((testing_well_ts[:, -1].shape[0], ), dtype=int)

	# indx_vectors_timeseries_test = indx_vectors_timeseries_test.reshape((vectors_test.shape[0], dim))
	predict_label = np.zeros(len(testSet),dtype=int)
	
	#r1 là ma trận 01
	r1 = np.array(r_dist < epsilon)
	# r1 = np.array(r1 - 2)
	
	print(r1)
	f_crp = myCrpFunctions.crossRecurrencePlots("CRP", r1, dotSize = 0.2)

	len_r1 = int(np.size(r1[0]))
	high_r1 = int(np.size(r1)/len_r1)

	#x is an array whose each element is a diagonal of the crp matrix
	diagonalsMatrix = []

	for offset in range(-high_r1+1, len_r1):
		diagonalsMatrix.append(np.array(np.diagonal(r1, offset), dtype=int))
		print(diagonalsMatrix[len(diagonalsMatrix)-1])
	# print(x)
	
	f_crp = myCrpFunctions.crossRecurrencePlots("CRP2", diagonalsMatrix, dotSize = 0.2)


	index_vecto = []
	#index_vecto là index của các statePhase trong ma trận r_dist
	#đoạn dưới là lấy vị trí statePhase test cho index_vecto từ ma trận đường chéo
	for i_row in range(len(diagonalsMatrix)):
		lenn = np.size(diagonalsMatrix[i_row])
		i = 0
		while i<lenn:
			if (diagonalsMatrix[i_row][i] == 1):
				start = i
				while (i<lenn-1 and diagonalsMatrix[i_row][i+1] == 1 ):
					i+=1
				if (i-start+1 > lambd):
					for temp in range(i-start+1):
						if (i_row < high_r1):
							index_vecto.append(start+temp)
						else:
							index_vecto.append(i_row - high_r1 + start + 1 + temp)			
			i+=1

	print("------------------------------------------\n",
			index_vecto,
			"\n------------------------------------------")
	#lọc y
	index_vecto.sort()
	index_vecto = filter_sort(index_vecto)

	#index của cái feature ban đầu
	index_timeseries_test = []
	#đoạn này chuyển lại statephase thành feature
	for i in index_vecto:
		for j in range(((int(dim*percent))-1)*tau + 1):
			index_timeseries_test.append(i+j)
	#lọc
	index_timeseries_test.sort()
	index_timeseries_test = filter_sort(index_timeseries_test)

	#khởi tạo label
	for i in index_timeseries_test:
		predict_label[i] = 1
	print('predict_label:', predict_label)
	print('num predict: ', np.sum(predict_label))

	testSetPr = SyncPredictToTestArr(index_timeseries_test, testSet)

	f_line = plt.figure("line")
	for i in range(1, np.size(testSetPr)):
		plt.plot(testSetPr[i], ':', label=i)
	plt.plot(trainSet, 'r', label='train')
	plt.legend(loc=0)
	plt.xlabel('index')
	plt.ylabel('feature')
	plt.title(titleOfGraph)
	# plt.show()

	return predict_label.ravel().tolist()
예제 #3
0
def rqaCalculate(rpBinaryMatrix, keyDot = 1, lambd = 2, typeReturn = 'array', showCRP = 0):
	import math

	len_rpBinaryMatrix = int(np.size(rpBinaryMatrix[0]))
	N = high_rpBinaryMatrix = int(np.size(rpBinaryMatrix)/len_rpBinaryMatrix)

	content = []
	for y in range(high_rpBinaryMatrix):
		s = ";".join(str(i+2) for i in rpBinaryMatrix[y] )
		s += "\n"
		content.append(s)
	myCrpFunctions.writeContentToFile('rqaOut.csv', content)

	RR =0
	DET = 0
	LAM = 0
	RATIO = 0
	averageL = 0
	averageH = 0
	DIV = 0
	ENTR = 0
	TT = 0

	# Duyệt các đường cao
	Ph, Hmax, averageTime1, averageTime2 = getPverticalLengthDot(rpBinaryMatrix, high_rpBinaryMatrix, len_rpBinaryMatrix, keyDot = keyDot)

	#Đếm số đường chéo theo độ dài
	Pl, Lmax = getPdiagonalLengthDot(rpBinaryMatrix, high_rpBinaryMatrix, len_rpBinaryMatrix, keyDot = keyDot)

	num_L = 0
	num_H = 0
	sum_Well_L = 0
	sum_Well_H = 0
	num_Well_L = 0
	num_Well_H = 0

	sumDot = Ph[0]   # Đếm số điểm chấm theo đường thẳng, ph[0] là những đoạn chỉ có 1 chấm

	lmin = lambd - 1

	for i in range(1, N+1, 1):
		sumDot += (i+1)*Ph[i]

		num_L += Pl[i]
		num_H += Ph[i]

		if (i>=lmin):
			sum_Well_L += Pl[i]*i
			sum_Well_H += Ph[i]*i

			num_Well_L += Pl[i]
			num_Well_H += Ph[i]

	RR = sumDot/(N**2)

	if num_L*num_Well_L > 0:
		DET = num_Well_L/num_L
		averageL = sum_Well_L/num_Well_L 

	if (DET > 0):
		RATIO = DET/RR

	if num_H*num_Well_H > 0:
		LAM = num_Well_H/num_H
		averageH = sum_Well_H/num_Well_H

	for i in range(lmin, N, 1):
		if (Pl[i] > 0):
			pl = Pl[i]/num_Well_L
			ENTR -= pl*math.log(pl)
	if Lmax > 0:
		DIV = 1/Lmax

	if (showCRP == 1):
		import matplotlib.pyplot as plt
		x = myCrpFunctions.crossRecurrencePlots("test", rpBinaryMatrix, keyDot = keyDot, dotSize = 10 )
		plt.show()
		print(RR)
		print(DET)
		print(LAM)
		print("averageL, averageH", averageL, averageH)
		print(Lmax, Hmax)
		print(DIV)
		print(ENTR)
		print("averageTime1, averageTime2", averageTime1, averageTime2)

	if (typeReturn == 'array'):
		return [RR, DET, LAM, RATIO, averageL, averageH, Lmax, Hmax, DIV, ENTR, averageTime1, averageTime2]
	if (typeReturn == 'dict'):
		return {
			"RR" : RR,
			"DET" : DET,
			"LAM" : LAM,
			"RATIO" : RATIO,
			"averageL" : averageL,
			"averageH" : averageH,
			"Lmax" : Lmax,
			"Hmax" : Hmax,
			"DIV" : DIV,
			"ENTR" : ENTR,
			"averageTime1" : averageTime1,
			"averageTime2" : averageTime2
		}