def get_location(self, first_char, second_char): # only normalize if character doesn't exist in keyboard if first_char not in self.qwerty_grid and first_char not in self.QWERTY_grid: first_char = helper.normalize(first_char) if second_char not in self.qwerty_grid and second_char not in self.QWERTY_grid: second_char = helper.normalize(second_char) loc1 = np.where(self.qwerty_grid == first_char) # if we can't find it, look in other map if len(loc1[0]) == 0: loc1 = np.where(self.QWERTY_grid == first_char) # still can't find it, just return average if len(loc1[0]) == 0: logger.debug("Couldn't find first character " + first_char + " so returning average locations") return self.average_x, self.average_y loc1_row = loc1[0][0] loc1_column = loc1[1][0] loc2 = np.where(self.qwerty_grid == second_char) # if we can't find it, look in other map if len(loc2[0]) == 0: loc2 = np.where(self.QWERTY_grid == second_char) # still can't find it, just return average if len(loc2[0]) == 0: logger.debug("Couldn't find second character " + second_char + " so returning average locations") return self.average_x, self.average_y loc2_row = loc2[0][0] loc2_column = loc2[1][0] # Handle spacebar case if first_char == ' ': if 3 <= loc2_column <= 7: loc1_column = loc2_column elif loc2_column < 3: loc1_column = 3 elif loc2_column > 7: loc1_column = 7 if second_char == ' ': if 3 <= loc1_column <= 7: loc2_column = loc1_column elif loc1_column < 3: loc2_column = 3 elif loc1_column > 7: loc2_column = 7 x = loc1_row - loc2_row y = loc1_column - loc2_column return x, y
def segment_ts(ts, window_size, skip_offset, word_length, y_alpha_size): ts_len = len(ts) mod = ts_len % window_size rnge = 0 if (skip_offset == 0): ts_len = int((ts_len - mod - window_size) / 1) rnge = int(ts_len / window_size) else: ts_len = int(math.ceil((ts_len) / skip_offset)) #ts_len=int(math.ceil((ts_len-window_size)/skip_offset)) rnge = int(ts_len) curr_count = 0 words = list() indices = list() complete_indices = list() for i in range(0, rnge): sub_section = ts[curr_count:(curr_count + window_size)] scale_val = abs(np.max(sub_section) - np.min(sub_section)) sub_section = normalize(sub_section) curr_word = "" chunk_size = int(len(sub_section) / word_length) num = 0 curr_letter = "" for j in range(0, word_length): chunk = sub_section[num:num + chunk_size] curr_letter = alphabetize_ts(chunk, y_alpha_size) curr_word += str(curr_letter) complete_indices.append(curr_count) num += chunk_size words.append(curr_word) indices.append(curr_count) temp_list = [] temp_list.append(sub_section) temp_df = pd.DataFrame() temp_df.insert(loc=0, column='sub_section', value=temp_list) temp_df.insert(loc=0, column='keys', value=curr_word) temp_df.insert(loc=0, column='offset', value=sorted(sub_section)[len(sub_section) // 2]) temp_df.insert(loc=0, column='scale', value=scale_val) temp_df.insert(loc=0, column='indices', value=curr_count) curr_count = curr_count + skip_offset if (i == 0): df_sax = temp_df.copy() else: df_sax = df_sax.append(temp_df, ignore_index=True) return (words, indices, df_sax)
def __init__(self, k=0, data=None, assignment=None, seed=None): # data is always the joint distribution with rows being words and columns being classes # However, in this paper, all the data points are from conditional probability distribution # In the paper l is the number of document classes, here we will use n to denote it # marginalize P(C) if not seed is None: np.random.seed(seed) self.data = data self.k = k self.gini = 0 """Initialize DC following the original paper""" # initial assignment p(c_j|w_t) = maxi p(c_i|w_t), k = n data = normalize(data) self.assignment = self.argmax_randtie_masking_generic(data, axis=1) clusters = convert_assignment_to_clusters(self.assignment, self.data) self.clusters = clusters _, n = data.shape if k > n: # split each cluster arbitrarily into at least floor(k/l) clusters n_to_split = k // n new_clusters = [] for cluster in clusters: splited_arrs = np.array_split(np.array(cluster), n_to_split) new_clusters += splited_arrs self.clusters = new_clusters elif k < n: for i in range(k, len(clusters)): clusters[k - 1] += clusters[i] self.clusters = clusters[:k] self.clusters = np.asarray(self.clusters)
def get_hand(self, char): char = helper.normalize(char) loc = np.where(self.qwerty_grid == char) # if we can't find it, look in other map if len(loc[0]) == 0: loc = np.where(self.QWERTY_grid == char) # still can't find it, just return none if len(loc[0]) == 0: logger.debug("Couldn't find character " + char + " so returning none") return None loc_row = loc[0][0] loc_column = loc[1][0] # special case for spacebar, can be typed with either hand if loc_row == 4: return 's' else: if loc_column <= 5: return 'l' else: return 'r'
def dtw_segment_ts(ts, window_size, skip_offset): ts_len = len(ts) mod = ts_len % window_size rnge = 0 if (skip_offset == 0): ts_len = int((ts_len - mod - window_size) / 1) rnge = int(ts_len / window_size) else: ts_len = int(math.ceil((ts_len) / skip_offset)) rnge = int(ts_len) curr_count = 0 indices = list() for i in range(0, rnge): sub_section = ts[curr_count:(curr_count + window_size)] sub_section = normalize(sub_section) indices.append(curr_count) temp_list = [] temp_list.append(sub_section) temp_df = pd.DataFrame() temp_df.insert(loc=0, column='sub_section', value=temp_list) temp_df.insert(loc=0, column='indices', value=curr_count) curr_count = curr_count + skip_offset if (i == 0): df_sax = temp_df.copy() else: df_sax = df_sax.append(temp_df, ignore_index=True) return (df_sax)
# fgs = build_fg_z_cube(redshifts,eor_amp,scalar) # combined_cubes = np.add(data_dict['data'][-i],fgs) #else: combined_cubes = data_dict['data'][80] #-np.mod(i,200)] print(np.shape(combined_cubes)) rnd_scale = 128 #np.random.choice(range(64,256,1)) #noise = np.zeros((512,512,30))# noise = snr[i] * np.random.normal( loc=0., scale=snr[i] * np.std(combined_cubes), size=(512, 512, 30)) #snr[i]*np.std(combined_cubes)*np.random.rand(512,512,30) print('Data std: {}'.format(np.std(combined_cubes))) print('Noise std: {}'.format(np.std(noise))) #data_sample = np.expand_dims(combined_cubes,axis=0) data_sample = hf.scale_(hf.normalize(combined_cubes + noise), rnd_scale).reshape(1, rnd_scale, rnd_scale, 30) label_sample = data_dict['labels'][80] #-np.mod(i,200)] print('scaled sample shape', np.shape(data_sample)) predict = fcn.fcn_model.predict(data_sample)[0] predict_err = ekf_model.pred_uncertainty(data_sample) print('Predicted Midpoint {0} Duration {1} Mean Z {2}'.format(*predict)) p1_arr.append(predict[0]) p2_arr.append(predict[1]) p3_arr.append(predict[2]) # p4_arr.append(predict[3]) # p5_arr.append(predict[4]) ssize.append(rnd_scale) t1_arr.append(label_sample[0])
def train(self,data_dict,epochs=10000,batch_size=12,scalar_=1e5,fgcube=None): loss_arr_t = [] loss_arr_v = [] resizing=True if 'self.fcn_model' in globals(): print('Model valid.') else: print('No model found, starting from scratch.') self.fcn_model = self.FCN() print(self.fcn_model.summary()) print('Doing a 80/20 Dataset Split.') # print('Building several realizations of point source foregrounds...') if fgcube: fgs = hf.load_FGCubes(fgcube) data_dict['foregrounds'] = fgs del(fgs) elif fgcube == 'Generate': fgs = [hf.build_fg_z_cube(data_dict['redshifts'],eor_amp=data_dict['eor_amp'],scalar=scalar_) for i in range(50)] data_dict['foregrounds'] = fgs del(fgs) else: print('No foregrounds included.') print('Scaling down cubes...') # data_dict_ = hf.scale_sample(data_dict) print('Normalizing scaled data cubes...') t0 = time() data_dict = hf.normalize(data_dict) # normalize all data once and first print('Normalizing dataset took {0} secs.'.format(time()-t0)) t0 = time() data_dict_ = hf.scale_sample(data_dict) print('Scaling dataset took {0} secs.'.format(time()-t0)) data = np.copy(data_dict_['data']) labels = np.copy(data_dict_['labels']) redshifts = np.copy(data_dict_['redshifts']) length = len(labels) train_data = np.array(data[:int(length*0.8)]) train_labels = np.array(labels[:int(length*0.8)]) val_data = np.array(data[int(length*0.8):]) val_labels = np.array(labels[int(length*0.8):]) epoch_inds_t = np.array(range(len(train_labels))).reshape(-1,batch_size) #fcn_model.fit(self.data,self.labels) gc.enable() #attempt garbage collection to release resources epoch_loss_t = [] epoch_loss_v = [] for e in range(epochs): print('Training Completed : {0}%'.format(100.*e/(1.*epochs))) #print(e) #rnd_ind_t = np.random.choice(range(len(train_labels)),size=batch_size) epoch_inds_t = np.random.permutation(epoch_inds_t) for i in range(len(train_labels)/batch_size): rnd_ind_v = np.random.choice(range(len(val_labels)),size=batch_size) #train_scale = train_data[rnd_ind_t] #val_scale = val_data[rnd_ind_v] # train_dict = {'data':train_scale,'labels':train_labels[rnd_ind_t],'redshifts':[]} # val_dict = {'data':val_scale,'labels':val_labels[rnd_ind_v],'redshifts':[]} # train_dict = hf.scale_sample(train_dict) # val_dict = hf.scale_sample(val_dict) # print('Train data shape: ',np.shape(train_dict['data'])) fcn_loss = self.fcn_model.train_on_batch(np.array(train_data[epoch_inds_t[i,:]]),train_labels[epoch_inds_t[i,:]]) val_loss = self.fcn_model.test_on_batch(np.array(val_data[rnd_ind_v]),val_labels[rnd_ind_v]) loss_arr_t.append(fcn_loss[0]) loss_arr_v.append(val_loss[0]) # del(val_dict) # del(train_dict) print('Epoch: {0} Train Loss: {1} Validation Loss: {2}'.format(e,np.mean(loss_arr_t),np.mean(loss_arr_v))) epoch_loss_t.append(np.mean(loss_arr_t)) epoch_loss_v.append(np.mean(loss_arr_v)) #if e % 100==0 and e!=0: if resizing: del(train_data) del(val_data) #print('Rescaling down new cubes...') #data_dict_ = hf.normalize(data_dict) data_dict_ = hf.scale_sample(data_dict) print('Expanding data volume...') data,labels = hf.expand_cubes(data_dict_) print('Dataset size now contains {0} samples.'.format(len(data))) #print('Normalizing new scaled data cubes...') train_labels = np.array(labels[:int(length*0.8)]) val_labels = np.array(labels[int(length*0.8):]) del(data_dict_) train_data = np.array(data[:int(length*0.8)]) val_data = np.array(data[int(length*0.8):]) plot_loss(self.model_name,range(epochs),epoch_loss_t,epoch_loss_v) return self.fcn_model
''' from Neural_Network import Neural_Network from helper_functions import parse_data import pandas as pd from OneHot import OneHot from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import seaborn as sns import numpy as np from helper_functions import load_terrain, normalize, matDesign from imageio import imread reduction = 36 x, y, z = load_terrain('./Terraindata/yellowstone1', reduction) x, y, z = normalize(x, y, z) # normalize training p_order = np.array([115]) toi = pd.DataFrame(columns=[ "number of layers", "nodes per layer", "epoch", "batch size", "learning rate", "initial learning rate", "momentum parameter", "lambda", "stopping tol", "cost", "accuracy", "data set", "pol order" ]) eta = np.array([0.4]) mini_batch_size = np.array([50]) epochs = np.array([100]) lmbd = np.array([1e-6]) gamma = np.array([0.9]) kfold = 10
os.remove( 'C:/Megatron/Thesis/Thesis_Work/Sax/Final_code_test/Output/Original.png' ) #remove_files() #pre_data = pd.read_csv('dataList.csv', sep=',', header=None) #data_df = pd.read_csv('dataframe.csv', sep=',' ) #ts = pre_data.iloc[1:, :1] pre_data = pd.read_csv('ECG200.csv', sep=',', header=None) ts = pre_data.iloc[:, 0].values.flatten() ts = np.asfarray(ts, float) ts = normalize(ts) plt.plot(ts) plt.savefig('./Output/Original.png') plt.show() y_alpha_size = 4 word_length = 3 window_size = 120 #round( len(ts) * 0.1 ) skip_offset = round(window_size) ham_distance = 0 seg_alpha, seg_indices, seg_df = segment_ts(ts, window_size, skip_offset, word_length, y_alpha_size) #seg_dtw_df = dtw_segment_ts(ts,window_size,skip_offset) compare_strings, compare_list = compare_shape_algo(seg_alpha, seg_indices,
def cal_kl_div_from_pts_to_centroid(self, data_pts, centroid, norm=True): data_pts = normalize(data_pts) centroid = centroid / np.sum(centroid) return np.sum(data_pts * (np.log2(data_pts / centroid)), axis=1)
def cal_kl_div_from_pt_to_centroids(self, data_pt, centroids, norm=True): centroids = normalize(centroids) data_pt = data_pt / np.sum(data_pt) return np.sum(data_pt * (np.log2(data_pt / centroids)), axis=1)
""" Compare NN with results from regression using tensorflow keras """ import pandas as pd from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import seaborn as sns import numpy as np from helper_functions import load_terrain, normalize,matDesign import tensorflow as tf reduction = 36 x,y,z = load_terrain('./Terraindata/yellowstone1', reduction) x,y,z = normalize(x,y,z) #use to normalize p_order = 50 X = matDesign(x,y,p_order) X = X[:,1:] model = tf.keras.models.Sequential([ tf.keras.layers.Dense(X.shape[1], activation='sigmoid'), tf.keras.layers.Dense(40, activation='sigmoid'), #tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(1, activation='sigmoid') ]) model.compile(optimizer='adam', loss='mse', metrics=['mse']) ### accuracy not important