def main(): r = np.genfromtxt('datasets/RawData_third.csv', delimiter=',', names=True, case_sensitive=True, dtype='int') t = np.genfromtxt('datasets/RawData_time_third.csv', delimiter=',', names=True, case_sensitive=True, dtype='float') obs1 = np.zeros((r['Behaviours__1'].size, 37), dtype='int') obs_time1 = np.zeros((t['Time__1'].size, 37), dtype='float') n = obs1[:, 0].size obs = np.zeros((n, 37), dtype='int') obs.fill(-1) obs_time = np.zeros((n, 37), dtype='float') animalID = np.zeros(n, int) targetID = np.zeros(n, int) for ro in range(obs1[:, 0].size): for col in range(36): obs[ro][col] = r[ro][col + 5] for row in range(obs[:, 0].size): for col in range(36): if (obs[row][col] == -1): obs[row][col] = 9 for row in range(obs_time1[:, 0].size): for col in range(36): obs_time[row][col] = t[row][col + 5] for row in range(obs[:, 0].size): animalID[row] = r[row][0] targetID[row] = r[row][4] pos = 0 count = 0 e = 0 group1 = np.zeros(27, float) group2 = np.zeros(27, float) g1 = 0 g2 = 0 plot_val = np.arange(27) error_matrix = np.zeros(54, dtype='float') with open('Results/MPS_per_AnimalID_250_90.csv', 'w') as csvfile: fieldnames = ['AnimalID', 'TargetID', 'PATH'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() while pos != obs[:, 0].size: g = 0 obs_set = np.zeros((12, 37), dtype='int') obs_time_set = np.zeros((12, 37), dtype='float') obs_set.fill(-1) for i in range(pos, pos + 12): for j in range(36): obs_set[i - pos][j] = obs[i][j] obs_time_set[i - pos][j] = obs_time[i][j] T = obs_set[0].shape[0] num_states = 9 trans_mat = hmm_train.trans_prob_matrix(obs_set) trans_mat = np.log(trans_mat) emi_mat_norm = hmm_train.emission_prob_matrix(obs_set) emi_mat_norm[:, 36] = 0 emi_mat = np.log(emi_mat_norm) emi_mat_time = hmm_train.emission_prob_matrix_time( obs_set, obs_time_set) emi_mat_time[:, 36] = 0 emi_mat_time = np.log(emi_mat_time) path_set = np.empty(T, dtype='int') path_set.fill(-1) for t in range(T): if (emi_mat_norm[8, t] > 0.8): path_set[t] = -2 elif (emi_mat_norm[8, t] > 0.7 and emi_mat_norm[8, t] < 0.8): for s in range(num_states - 1): path_set[t] = np.argmax(emi_mat[:, t] + trans_mat[:, s]) elif (emi_mat_norm[8, t] > 0.5 and emi_mat_norm[8, t] < 0.7): for s in range(num_states - 1): path_set[t] = np.argmin(emi_mat[:, t - 1] + trans_mat[s, s]) else: for s in range(num_states - 1): path_set[t] = np.argmax(emi_mat[:, t - 1] + trans_mat[s, s] + emi_mat_time[:, t]) path_set[36] = -2 writer.writerow({ 'AnimalID': str(animalID[pos]), 'TargetID': str(targetID[pos]), 'PATH': str(path_set + 1) }) '''for r in range(37): if(path_set[r]+1==-1): g = g+1 if(targetID[pos]==1): val = (36-g)/36 group1[g1] = val g1 = g1 + 1 if(targetID[pos]==2): val = (36-g)/36 group2[g2] = val g2 = g2 + 1''' pos = pos + 12
def main(): # Reading csv data file and retreiving content in variables. r = np.genfromtxt('datasets/RawData_third.csv', delimiter=',', names=True, case_sensitive=True, dtype='int') t = np.genfromtxt('datasets/RawData_time_third.csv', delimiter=',', names=True, case_sensitive=True, dtype='float') #Declaring variables for size and storing particular data needed in analysis obs1 = np.zeros((r['Behaviours__1'].size, 37), dtype='int') obs_time1 = np.zeros((t['Time__1'].size, 37), dtype='float') # Setting total size of observation sequences. # Declaring observation sequence variables. n = obs1[:, 0].size obs = np.zeros((n, 37), dtype='int') obs.fill(-1) obs_time = np.zeros((n, 37), dtype='float') T = obs[0].shape[0] num_states = 9 #Populating observation sequence variables using data from four datasets. for ro in range(obs1[:, 0].size): for col in range(36): obs[ro][col] = r[ro][col + 5] for row in range(obs[:, 0].size): for col in range(36): if (obs[row][col] == -1): obs[row][col] = 9 for row in range(obs_time1[:, 0].size): for col in range(36): obs_time[row][col] = t[row][col + 5] # Training model based on observation sequence #Calculating transition probability matrix based on observation trans_mat_norm = hmm_train.trans_prob_matrix(obs) trans_mat = np.log(trans_mat_norm) #Calculating Emission probability matrix for model emi_mat_norm = hmm_train.emission_prob_matrix(obs) emi_mat_norm[:, 36] = 0 emi_mat = np.log(emi_mat_norm) # Calculating Average time for model emi_mat_time_norm = hmm_train.emission_prob_matrix_time(obs, obs_time) emi_mat_time = np.log(emi_mat_time_norm) emi_mat_time[:, 36] = 0 #All the values for transition, emission and average have log applied #to them to avoid float underflow as probability values sometimes go very low # Declaring variable to store most probable state sequence path = np.empty(T, dtype='int') path.fill(-1) # Looping over complete observation sequence for all the states to get most #probable state sequence for t in range(T): if (emi_mat_norm[8, t] > 0.8): path[t] = -2 elif (emi_mat_norm[8, t] > 0.7 and emi_mat_norm[8, t] < 0.8): for s in range(num_states - 1): path[t] = np.argmax(emi_mat[:, t] + trans_mat[:, s]) elif (emi_mat_norm[8, t] > 0.5 and emi_mat_norm[8, t] < 0.7): for s in range(num_states - 1): path[t] = np.argmin(emi_mat[:, t - 1] + trans_mat[s, s]) else: for s in range(num_states - 1): path[t] = np.argmax(emi_mat[:, t - 1] + trans_mat[s, s] + emi_mat_time[:, t]) path[36] = -2 print('==================== MOST PROBABLE STATE SEQUENCE ================') print(path + 1) print('==================================================================') return (path + 1)
def main(): r = np.genfromtxt('datasets/RawData_first.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'int') r2 = np.genfromtxt('datasets/RawData_second.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'int') r3 = np.genfromtxt('datasets/RawData_third.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'int') r4 = np.genfromtxt('datasets/RawData_fourth.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'int') t = np.genfromtxt('datasets/RawData_time_first.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'float') t2 = np.genfromtxt('datasets/RawData_time_second.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'float') t3 = np.genfromtxt('datasets/RawData_time_third.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'float') t4 = np.genfromtxt('datasets/RawData_time_fourth.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'float') obs1 = np.zeros((r['Behaviours__1'].size,37),dtype='int') obs1.fill(-1) obs2 = np.zeros((r2['Behaviours__1'].size,37),dtype='int') obs2.fill(-1) obs3 = np.zeros((r3['Behaviours__1'].size,37),dtype='int') obs3.fill(-1) obs4 = np.zeros((r4['Behaviours__1'].size,37),dtype='int') obs4.fill(-1) obs_time1 = np.zeros((t['Time__1'].size,37),dtype='float') obs_time2 = np.zeros((t2['Time__1'].size,37),dtype='float') obs_time3 = np.zeros((t3['Time__1'].size,37),dtype='float') obs_time4 = np.zeros((t4['Time__1'].size,37),dtype='float') animalID = np.zeros(obs1[:,0].size, int) targetID = np.zeros(obs1[:,0].size, int) for ro in range(obs1[:,0].size): for col in range(36): obs1[ro][col] = r[ro][col+5] for row in range(obs2[:,0].size): for col in range(36): obs2[row][col] = r2[row][col+5] for row in range(obs3[:,0].size): for col in range(36): obs3[row][col] = r3[row][col+5] for row in range(obs4[:,0].size): for col in range(36): obs4[row][col] = r4[row][col+5] for row in range(obs1[:,0].size): animalID[row] = r[row][0] targetID[row] = r[row][4] for row in range(obs1[:,0].size): for col in range(36): if(obs1[row][col]== -1): obs1[row][col] = 9 if(obs2[row][col]== -1): obs1[row][col] = 9 if(obs3[row][col]== -1): obs1[row][col] = 9 if(obs4[row][col]== -1): obs1[row][col] = 9 for row in range(obs_time1[:,0].size): for col in range(36): obs_time1[row][col] = t[row][col+5] for row in range(obs_time2[:,0].size): for col in range(36): obs_time2[row][col] = t2[row][col+5] for row in range(obs_time3[:,0].size): for col in range(36): obs_time3[row][col] = t3[row][col+5] for row in range(obs_time4[:,0].size): for col in range(36): obs_time4[row][col] = t4[row][col+5] pos = 0 error_matrix = np.zeros(54, dtype = 'float') e=0 group1 = np.zeros(27, float) group2 = np.zeros(27, float) g1=0 g2=0 plot_val = np.arange(27) with open('Results/MPS_per_AnimalID_all_Datasets.csv','w') as csvfile: fieldnames = ['AnimalID','TargetID','PATH'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() while pos!= obs1[:,0].size: g = 0 obs_set = np.zeros((48,37), dtype = 'int') obs_time_set = np.zeros((48,37), dtype='float') obs_set.fill(-1) for i in range(pos, pos+12): for j in range(36): obs_set[i-pos][j] = obs1[i][j] obs_set[(i-pos)+12][j] = obs2[i][j] obs_set[(i-pos)+24][j] = obs3[i][j] obs_set[(i-pos)+36][j] = obs4[i][j] obs_time_set[i-pos][j] = obs_time1[i][j] obs_time_set[(i-pos)+12][j] = obs_time2[i][j] obs_time_set[(i-pos)+24][j] = obs_time3[i][j] obs_time_set[(i-pos)+36][j] = obs_time4[i][j] T = obs_set[0].shape[0] num_states = 9 trans_mat = hmm_train.trans_prob_matrix(obs_set) trans_mat = np.log(trans_mat) emi_mat_norm = hmm_train.emission_prob_matrix(obs_set) emi_mat_norm[:,36] = 0 emi_mat = np.log(emi_mat_norm) emi_mat_time = hmm_train.emission_prob_matrix_time(obs_set, obs_time_set) emi_mat_time[:,36] = 0 emi_mat_time = np.log(emi_mat_time) path_set = np.empty(T, dtype='int') path_set.fill(-1) for t in range(T): if(emi_mat_norm[8, t] > 0.8): path_set[t] = -2 elif(emi_mat_norm[8, t] > 0.7 and emi_mat_norm[8, t] < 0.8): for s in range(num_states-1): path_set[t] = np.argmax(emi_mat[:,t] + trans_mat[:, s]) elif(emi_mat_norm[8, t] > 0.5 and emi_mat_norm[8, t] < 0.7): for s in range(num_states-1): path_set[t] = np.argmin(emi_mat[:,t-1] + trans_mat[s, s]) else: for s in range(num_states-1): path_set[t] = np.argmax(emi_mat[:,t-1] + trans_mat[s,s] + emi_mat_time[:,t]) path_set[36] = -2 writer.writerow({'AnimalID' : str(animalID[pos]), 'TargetID' : str(targetID[pos]), 'PATH' : str(path_set+1)}) for r in range(37): if(path_set[r]+1==-1): g = g+1 if(targetID[pos]==1): val = (36-g)/36 group1[g1] = val g1 = g1 + 1 if(targetID[pos]==2): val = (36-g)/36 group2[g2] = val g2 = g2 + 1 pos = pos+12 plt.plot(plot_val, group1,'r',label="group1") plt.plot(plot_val, group2, 'b', label="group2") plt.legend(('group1', 'group2')) plt.show()
def main(): r = np.genfromtxt('datasets/RawData_fourth.csv', delimiter=',', names=True, case_sensitive=True, dtype='int') t = np.genfromtxt('datasets/RawData_time_fourth.csv', delimiter=',', names=True, case_sensitive=True, dtype='float') obs1 = np.zeros((r['Behaviours__1'].size, 37), dtype='int') obs_time1 = np.zeros((t['Time__1'].size, 37), dtype='float') n = obs1[:, 0].size m = int(n / 2) obs = np.zeros((m, 37), dtype='int') obsg2 = np.zeros((m, 37), int) obs.fill(-1) obsg2.fill(-1) obs_time = np.zeros((m, 37), dtype='float') obsg2_time = np.zeros((m, 37), float) T = obs[0].shape[0] num_states = 9 for ro in range(m): for col in range(36): obs[ro][col] = r[ro][col + 5] for ro in range(m, n): for col in range(36): obsg2[ro - m][col] = r[ro][col + 5] for row in range(obs[:, 0].size): for col in range(36): if (obs[row][col] == -1): obs[row][col] = 9 if (obsg2[row][col] == -1): obsg2[row][col] = 9 for row in range(m): for col in range(36): obs_time[row][col] = t[row][col + 5] for row in range(m, n): for col in range(36): obsg2_time[row - m][col] = t[row][col + 5] trans_mat_norm1 = hmm_train.trans_prob_matrix(obs) trans_mat1 = np.log(trans_mat_norm1) emi_mat_norm1 = hmm_train.emission_prob_matrix(obs) emi_mat_norm1[:, 36] = 0 emi_mat1 = np.log(emi_mat_norm1) emi_mat_time_norm1 = hmm_train.emission_prob_matrix_time(obs, obs_time) emi_mat_time1 = np.log(emi_mat_time_norm1) emi_mat_time1[:, 36] = 0 trans_mat_norm2 = hmm_train.trans_prob_matrix(obsg2) trans_mat2 = np.log(trans_mat_norm2) emi_mat_norm2 = hmm_train.emission_prob_matrix(obsg2) emi_mat_norm2[:, 36] = 0 emi_mat2 = np.log(emi_mat_norm2) emi_mat_time_norm2 = hmm_train.emission_prob_matrix_time(obsg2, obsg2_time) emi_mat_time2 = np.log(emi_mat_time_norm2) emi_mat_time2[:, 36] = 0 path1 = np.empty(T, dtype='int') path1.fill(-1) path2 = np.empty(T, dtype='int') path2.fill(-1) for t in range(T): if (emi_mat_norm1[8, t] > 0.8): path1[t] = -2 elif (emi_mat_norm1[8, t] > 0.7 and emi_mat_norm1[8, t] < 0.8): for s in range(num_states - 1): path1[t] = np.argmax(emi_mat1[:, t] + trans_mat1[:, s]) elif (emi_mat_norm1[8, t] > 0.5 and emi_mat_norm1[8, t] < 0.7): for s in range(num_states - 1): path1[t] = np.argmin(emi_mat1[:, t - 1] + trans_mat1[s, s]) else: for s in range(num_states - 1): path1[t] = np.argmax(emi_mat1[:, t - 1] + trans_mat1[s, s] + emi_mat_time1[:, t]) for t in range(T): if (emi_mat_norm2[8, t] > 0.8): path2[t] = -2 elif (emi_mat_norm2[8, t] > 0.7 and emi_mat_norm2[8, t] < 0.8): for s in range(num_states - 1): path2[t] = np.argmax(emi_mat2[:, t] + trans_mat2[:, s]) elif (emi_mat_norm2[8, t] > 0.5 and emi_mat_norm2[8, t] < 0.7): for s in range(num_states - 1): path2[t] = np.argmin(emi_mat2[:, t - 1] + trans_mat2[s, s]) else: for s in range(num_states - 1): path2[t] = np.argmax(emi_mat2[:, t - 1] + trans_mat2[s, s] + emi_mat_time2[:, t]) for i in range(37): if (path1[i] == 8): path1[i] = -2 if (path2[i] == 8): path2[i] = -2 path1[36] = -2 path2[36] = -2 count1 = 0 count2 = 0 for i in range(37): if (path1[i] + 1 == -1): count1 = count1 + 1 for i in range(37): if (path2[i] + 1 == -1): count2 = count2 + 1 count1 = float((36 - count1) / 36) count2 = float((36 - count2) / 36) print('=============== MOST PROBABLE STATE SEQUENCE =================') print('MPS Group1 : ', path1 + 1) print('==============================================================') print('MPS Group2 : ', path2 + 1) print('==============================================================') return count1, count2, path1 + 1, path2 + 1
def main(): #Getting data from csv file and extracting in variables r = np.genfromtxt('datasets/RawData_first.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'int') r2 = np.genfromtxt('datasets/RawData_second.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'int') r3 = np.genfromtxt('datasets/RawData_third.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'int') r4 = np.genfromtxt('datasets/RawData_fourth.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'int') t = np.genfromtxt('datasets/RawData_time_first.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'float') t2 = np.genfromtxt('datasets/RawData_time_second.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'float') t3 = np.genfromtxt('datasets/RawData_time_third.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'float') t4 = np.genfromtxt('datasets/RawData_time_fourth.csv',delimiter=',', names=True,case_sensitive=True,dtype = 'float') #defining observation sequences obs1 = np.zeros((r['Behaviours__1'].size,37),dtype='int') obs2 = np.zeros((r2['Behaviours__1'].size,37),dtype='int') obs3 = np.zeros((r3['Behaviours__1'].size,37),dtype='int') obs4 = np.zeros((r4['Behaviours__1'].size,37),dtype='int') obs_time1 = np.zeros((t['Time__1'].size,37),dtype='float') obs_time2 = np.zeros((t2['Time__1'].size,37),dtype='float') obs_time3 = np.zeros((t3['Time__1'].size,37),dtype='float') obs_time4 = np.zeros((t4['Time__1'].size,37),dtype='float') #defining arrays for AnimalID and TargetID and TrialNo. animalID = np.zeros(obs1[:,0].size, int) targetID = np.zeros(obs1[:,0].size, int) trialNo = np.zeros(obs1[:,0].size, int) #Populating Observation sequence for ro in range(obs1[:,0].size): for col in range(36): obs1[ro][col] = r[ro][col+5] for row in range(obs2[:,0].size): for col in range(36): obs2[row][col] = r2[row][col+5] for row in range(obs3[:,0].size): for col in range(36): obs3[row][col] = r3[row][col+5] for row in range(obs4[:,0].size): for col in range(36): obs4[row][col] = r4[row][col+5] for row in range(obs1[:,0].size): animalID[row] = r[row][0] targetID[row] = r[row][4] trialNo[row] = r[row][2] for row in range(obs1[:,0].size): for col in range(36): if(obs1[row][col]== -1): obs1[row][col] = 9 if(obs2[row][col]== -1): obs1[row][col] = 9 if(obs3[row][col]== -1): obs1[row][col] = 9 if(obs4[row][col]== -1): obs1[row][col] = 9 #Populating observtion time sequence for row in range(obs_time1[:,0].size): for col in range(36): obs_time1[row][col] = t[row][col+5] for row in range(obs_time2[:,0].size): for col in range(36): obs_time2[row][col] = t2[row][col+5] for row in range(obs_time3[:,0].size): for col in range(36): obs_time3[row][col] = t3[row][col+5] for row in range(obs_time4[:,0].size): for col in range(36): obs_time4[row][col] = t4[row][col+5] pos = 0 error_matrix = np.zeros(54, dtype = 'float') e=0 group1 = np.zeros((8,12), int) group2 = np.zeros((8,12), int) g1=0 g2=0 plot_val = np.arange(27) #loops to write in csv file and simulatneously train and get most probable sequence with open('Results/MPS_each_trial_all_Datasets.csv','w') as csvfile: fieldnames = ['AnimalID','TrialNo.','TargetID','PATH'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() while pos!= obs1[:,0].size: g = 0 obs_set = np.zeros((4,37), dtype = 'int') obs_time_set = np.zeros((4,37), dtype='float') obs_set.fill(-1) for j in range(36): obs_set[0][j] = obs1[pos][j] obs_set[1][j] = obs2[pos][j] obs_set[2][j] = obs3[pos][j] obs_set[3][j] = obs4[pos][j] obs_time_set[0][j] = obs_time1[pos][j] obs_time_set[1][j] = obs_time2[pos][j] obs_time_set[2][j] = obs_time3[pos][j] obs_time_set[3][j] = obs_time4[pos][j] T = obs_set[0].shape[0] num_states = 9 trans_mat = hmm_train.trans_prob_matrix(obs_set) trans_mat = np.log(trans_mat) emi_mat_norm = hmm_train.emission_prob_matrix(obs_set) emi_mat_norm[:,36] = 0 emi_mat = np.log(emi_mat_norm) emi_mat_time = hmm_train.emission_prob_matrix_time(obs_set, obs_time_set) emi_mat_time[:,36] = 0 emi_mat_time = np.log(emi_mat_time) path_set = np.empty(T, dtype='int') path_set.fill(-1) for t in range(T): if(emi_mat_norm[8, t] > 0.8): path_set[t] = -2 elif(emi_mat_norm[8, t] > 0.7 and emi_mat_norm[8, t] < 0.8): for s in range(num_states-1): path_set[t] = np.argmax(emi_mat[:,t] + trans_mat[:, s]) elif(emi_mat_norm[8, t] > 0.5 and emi_mat_norm[8, t] < 0.7): for s in range(num_states-1): path_set[t] = np.argmin(emi_mat[:,t-1] + trans_mat[s, s]) else: for s in range(num_states-1): path_set[t] = np.argmax(emi_mat[:,t-1] + trans_mat[s,s] + emi_mat_time[:,t]) path_set[36] = -2 writer.writerow({'AnimalID' : str(animalID[pos]),'TrialNo.' : str(trialNo[pos]), 'TargetID' : str(targetID[pos]), 'PATH' : str(path_set+1)}) if(targetID[pos]==1): for i in range(37): if(path_set[i] >= 0): group1[path_set[i]][trialNo[pos]-1] = group1[path_set[i]][trialNo[pos]-1] + 1 if(targetID[pos]==2): for i in range(37): if(path_set[i] >= 0): group2[path_set[i]][trialNo[pos]-1] = group2[path_set[i]][trialNo[pos]-1] + 1 pos = pos+1 return group1, group2