def test(): to_run = [40, 60, 80, 100] for i in to_run: start_time = time.time() method = Greedy(i, datapre.Features(), datapre.CategoriesDistribution(), 0.1555) # profiles = method.SearchWithoutConstraints() # profiles = method.SearchWithConstraints() profiles = method.SearchWithReplace() # print len(profiles) end_time = time.time() # 将结果写入文件 with open("%dGB_results" % i, "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("\n") f.write("Attribute Representativeness is:") f.write(str(method.AttributeRepresentative(profiles))) f.write("\n") for profile in profiles: f.write(profile + "\t") # test()
def test(): method = Classifier(datapre.Features()) train_set,test_set = method.Split() print "数据集分割完成" print "训练集和测试集数量为:%d,%d" % (len(train_set),len(test_set)) # 三个方法分别在train_set中寻找100个代表性人物,用代表性人物来分类test_set epsilons = [0.1560,0.1556,0.1555] # 将PageRank提取出来的100个用户也来做个分类 # PageRank_method = pr.PageRank(40,train_set,datapre.GetUserCategory()) # # 获得出入度矩阵 # uMatrix = PageRank_method.GetUserMatrix() # # # # 转移矩阵 # fMatrix = mat([(1 - 0.85) / len(train_set.keys()) for i in range(len(train_set.keys()))]).T # # 初始矩阵 # initPRMatrix = mat([1 for i in range(len(train_set.keys()))]).T # # result为影响力分数结果 # PRMatrix = PageRank_method.PageRank(uMatrix,fMatrix,0.85,initPRMatrix,0.01,120) # user_ids = train_set.keys() # uPR = {} # for i,id in zip(range(len(user_ids)),user_ids): # uPR[id] = PRMatrix[i] # # 对uPR排序 # uPR = sorted(uPR.items(),key = lambda dic:dic[1],reverse=True) # profiles = [u[0] for u in uPR[:100]] # print "PageRank的分类准确性为%.3f" % method.Classify(profiles,test_set) # return # epsilons = [0.1556,0.1555] # epsilons = [0.1560] # init.InitialMatrix(train_set) number = [40,60,80,100] print "开始抽取代表性用户" for epsilon in epsilons: with open("%.4f" % epsilon,"wb") as f: for k in number: profiles1 = greedy.Greedy(k,train_set,datapre.CategoriesDistribution(),epsilon).SearchWithReplace() print "GB方法计算完成" profiles2 = kmediods.KMedoids(k,train_set,datapre.CategoriesDistribution(),epsilon).Search() print "kmedoids方法计算完成" profiles3 = sa.SAalgo(k,train_set,datapre.CategoriesDistribution(),epsilon,0.3,10,0.9).Search() print "sa方法计算完成" accuracy1 = method.Classify(profiles1,test_set) f.write("方法:GB; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy1)) accuracy2 = method.Classify(profiles2,test_set) f.write("方法:kmedoids; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy2)) accuracy3 = method.Classify(profiles3,test_set) f.write("方法:SA; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy3)) print "方法:GB; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy1) print "方法:kmedoids; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy2) print "方法:SA; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy3)
def evaluate(test_data_path, test_label_path, model_path, output_path, pad_index=0, max_length=256): test_data = dp.create_data(test_data_path) test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=pad_index, padding="post", maxlen=max_length) test_labl = dp.create_label(test_label_path) model = keras.models.load_model(model_path) result = model.evaluate(test_data, test_labl) with open(output_path, 'w+', encoding='UTF-8') as f: f.write(str(result[1])) f.close()
def BuildTraining(batch_size, img_size, fea_label_file, train_index, validata_index): BATCH_SIZE = batch_size IMG_SIZE = img_size train_set = DataPrepare.DefaultDataset(fea_label_file, load_index=train_index) train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) validate_set = DataPrepare.DefaultDataset(fea_label_file, load_index=validata_index) validate_loader = torch.utils.data.DataLoader(validate_set, batch_size=BATCH_SIZE, shuffle=True) print('Data load Success') return train_loader, validate_loader
def BuildTesting(batch_size, img_size, fea_label_file): BATCH_SIZE = batch_size test_set = DataPrepare.DefaultDataset(fea_label_file) test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False) print('Data load Success') return test_loader
def Run(): features = datapre.Features() epsilons = [0.1560, 0.1556, 0.1555] number = [40, 60, 80, 100] for epsilon in epsilons: for n in number: start_time = time.time() profile1 = greedy.Greedy(n, features, datapre.CategoriesDistribution(), epsilon).SearchWithReplace() end_time = time.time() with open("GB%d_%.4f" % (n, epsilon), "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("Attribute Representativeness is:") f.write(str(metric.AttributeRepresentative(features, profile1))) f.write("\n") for profile in profile1: f.write(profile + "\t") start_time = time.time() profile2 = kmedoids.KMedoids(n, features, datapre.CategoriesDistribution(), epsilon).Search() end_time = time.time() with open("kmedoids%d_%.4f" % (n, epsilon), "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("Attribute Representativeness is:") f.write(str(metric.AttributeRepresentative(features, profile2))) f.write("\n") for profile in profile2: f.write(profile + "\t") start_time = time.time() profile3 = sa.SAalgo(n, features, datapre.CategoriesDistribution(), epsilon, 0.3, 10, 0.9).Search() end_time = time.time() with open("sa%d_%.4f" % (n, epsilon), "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("Attribute Representativeness is:") f.write(str(metric.AttributeRepresentative(features, profile3))) f.write("\n") for profile in profile3: f.write(profile + "\t")
def Cluster(self): k = self.k_min # 初始化种子 k_seeds = list(datapre.Initial(self.features, k)) # 聚类领域 # 聚类簇 cluster = {} # 开始迭代 iteration = 0 while iteration < self.Max_iteration: print k_seeds for seed in k_seeds: cluster[seed] = set() # 把种子加入 cluster[seed] = cluster[seed] | {seed} # 对所有元素进行聚类 for key in self.features.keys(): results = { seed: self.R[self.R_dic[seed], self.R_dic[key]] for seed in k_seeds } # results = {} # for seed in k_seeds: # results[seed] = metric.Repre(self.features[seed],self.features[key]) # 距离k_seeds中的id最近,并入id聚类簇中 id = (max(results.items(), key=lambda key: key[1]))[0] # 并入该聚类簇中 cluster[id] = cluster[id] | {key} print "新的聚类簇形成" # for seed in k_seeds: # print len(cluster[seed]) # 更新质点向量 # flag来判断是否需要停止迭代 flag = True # 对每个聚类簇分别判断 print "更新聚类中心" new_k_seeds = set() for seed in k_seeds: new_mediod = self.SelectNewMediod(list(cluster[seed])) new_k_seeds.add(new_mediod) if new_mediod != seed: # 需要继续迭代 flag = False if flag == True: # 停止迭代 break k_seeds = new_k_seeds iteration += 1 print "迭代%d次" % iteration return cluster, k_seeds
def testTree(): listRows = DataPrepare.parseFile(DataSpecific.testingDFile) listResults = findValue(listRows) countCorrect = 0.0 for i in range(0,len(listRows)): row = listRows[i] if(row[15] == listResults[i]): countCorrect = countCorrect + 1 return countCorrect*100/float(len(listRows))
def load_test(test_data_path, date, lgbm=True): dataLoader = DataPrepare.DataLoader(val_size=0, lgbm=lgbm) dataLoader.load_from_dir(path_data=[test_data_path], path_real_churn=path_real_churn, dates=date, test=True) X_test = dataLoader.test_data if lgbm: X_test_ohe = dataLoader.test_data_ohe return X_test, X_test_ohe else: return X_test, None
def ProfileDomainDistribution(profiles): features = datapre.Features() categories = [ 'Politics', 'Religion', 'Military', 'Education', 'Economy', 'Technology', 'Agriculture', 'Sports', 'Entertainment' ] number = [0 for i in range(len(categories))] for profile in profiles: for i in range(len(categories)): if features[profile][5] == categories[i]: number[i] += 1 return number
def Search(self): profiles = set() medoids_clusters = {} # 对每个领域聚类 people = datapre.People(self.features) for category in self.categories.keys(): # 对每个领域进行聚类 number = int(self.k * self.categories[category]) + 1 tuples = people[category] method = KMedoidsCluster( number, datapre.FeaturesById(tuples, self.features), category) clusters, medoids = method.Cluster() # 先加入到profiles中 for medoid in medoids: profiles.add(medoid) medoids_clusters[medoid] = clusters[medoid] print "开始删除" # 删除多出来的 profiles = self.Delete(profiles) print "开始替换" profiles = self.Replace(profiles, medoids_clusters) return profiles
def Split(self): # 返回结果为训练集和测试集 train_set = {} test_set = {} # 对原集中每个领域取3/10加入train_set,取7/10加入test_set people = datapre.People(self.features) categories = datapre.GetUserCategory() for category in categories: domain_people = people[category] train_set_number = int(len(domain_people) * 0.3) + 1 count = 0 for id in domain_people: if count < train_set_number: train_set[id] = self.features[id] count += 1 else: break # 将剩余的用户加入 left = set(self.features.keys()) - set(train_set.keys()) for id in left: test_set[id] = self.features[id] return train_set,test_set
def test(): features = datapre.Features() to_run = [40, 60, 80, 100] for i in to_run: start_time = time.time() method = KMedoids(i, datapre.Features(), datapre.CategoriesDistribution(), 0.1555) profiles = method.Search() end_time = time.time() print metric.AttributeRepresentative(features, profiles) print profiles print "cost %f s" % (end_time - start_time) with open("%dclustering_result" % i, "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("\n") f.write("Attribute Representativeness is:") f.write(str(metric.AttributeRepresentative(features, profiles))) f.write("\n") for profile in profiles: f.write(profile + "\t") # test()
def cleanRow(row): row = DataPrepare.cleanRow(row) dictContVar = tree.dictIntervalContVar for entry in dictContVar.keys(): listIntervals = dictContVar[entry] for i in range(0,len(listIntervals)-1): if row[entry] == '?': break if row[entry] > listIntervals[i] and row[entry] <= listIntervals[i+1]: row[entry] = i break return row
def load_data_for_models(val_size=0.2, lgbm=True): dataLoader = DataPrepare.DataLoader(val_size, lgbm) dataLoader.load_from_dir(path_data=path_data, path_real_churn=path_real_churn, dates=dates) X_train = dataLoader.all_train_X y_train = dataLoader.all_train_y X_val = dataLoader.all_val_X y_val = dataLoader.all_val_y print(y_train.value_counts(normalize=True)) if lgbm: X_train_ohe = dataLoader.all_train_X_ohe X_val_ohe = dataLoader.all_val_X_ohe return X_train, y_train, X_val, y_val, X_train_ohe, X_val_ohe else: return X_train, y_train, X_val, y_val, None
def predict(test_data_path, model_path, output_path, pad_index=0, max_length=256): test_data = dp.create_data(test_data_path) test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=pad_index, padding="post", maxlen=max_length) model = keras.models.load_model(model_path) results = model.predict(test_data) with open(output_path, 'w+', encoding='UTF-8') as f: for result in results: f.write("{}\n".format(result)) f.close()
def Replace(self, target, profiles): # people为每个领域的用户集合 people = datapre.People(self.features) category = self.features[target][5] index = profiles.index(target) old_element = profiles[index] profile_domain = set( [id for id in profiles if self.features[id][5] == category]) if os.path.exists("new%sRepresentativeMatrix.npy" % category): # 加载矩阵 # open_file = open("%sRepresentativeMatrix.pickle" % category) # R = pickle.load(open_file) # open_file.close() # R = np.load("new%sRepresentativeMatrix.npy" % category) R = self.Repre[category] # 加载id字典 # open_file = open("new%sRepresentativeDictionary.pickle" % category) # R_dic = pickle.load(open_file) # open_file.close() R_dic = self.Repre_id[category] # 该领域的代表性人物对应的所有行 rows = set([R_dic[id] for id in profile_domain]) results = { element: sum( np.max(np.asarray([R[i] for i in rows | {R_dic[element]}]), axis=0)) for element in people[category] if element not in set(profiles) } results = sorted(results.items(), key=lambda dic: dic[1], reverse=True) for result in results: to_replace = result[0] if metric.checkOneTypical(self.features, to_replace, profiles, self.epsilon): self.replace[target] = to_replace profiles[index] = old_element # print new_element return to_replace return None
def SearchWithoutConstraints(self): # 每次并入使得目标函数最小化 profiles = set() people = datapre.People(self.features) print "数据集装载完毕" for category in self.categories.keys(): # p_number为该领域需要的人数 p_number = (int)(self.k * self.categories[category]) + 1 # tuples为该领域所有的人 tuples = people[category] if not os.path.exists("new%sRepresentativeMatrix.npy" % category): pass else: # 加载矩阵 # open_file = open("new%sRepresentativeMatrix.pickle" % category) # R = pickle.load(open_file) # open_file.close() # 换一种加载方式 # R = np.load("new%sRepresentativeMatrix.npy" % category) R = self.Repre[category] rowN = len(tuples) results_vector = np.asarray([0 for i in xrange(rowN)]) # 得到了代表性矩阵后 count = 0 has = {} while count < p_number: # results = {i:sum(max(x,y) for x,y in zip(R[i],results_vector)) for i in xrange(rowN) if i not in has} results = { i: sum(np.max(np.vstack((R[i], results_vector)), axis=0)) for i in xrange(rowN) if i not in has } to_add = (max(results.items(), key=lambda key: key[1]))[0] has[to_add] = tuples[to_add] profiles.add(tuples[to_add]) # 更新 results_vector = np.max(np.vstack((R[to_add], results_vector)), axis=0) # results_vector = [max(x,y) for x,y in zip(R[to_add],results_vector)] count += 1 print "the number of profiles is %d" % len(profiles) return list(profiles)
def SplineData(DiltedData, times=2) -> list: res = [] for i in range(DiltedData.__len__()): conf = GetCF(DiltedData[i]) rec = GetRe(DiltedData[i]) dea = GetDe(DiltedData[i]) datal = conf.__len__() base = np.linspace(0, datal + 1, datal + 2) splc = BSpline(base, conf, times) splr = BSpline(base, rec, times) spld = BSpline(base, dea, times) base2 = np.linspace(1, datal - 1, datal * 10) conf_, rec_, dea_ = splc(base2), splr(base2), spld(base2) ret = [] for j in range(0, conf_.__len__()): x = DP.Data() x.ConfirmedData = conf_[j] x.RecoverData = rec_[j] x.DeathData = dea_[j] x.AreaName = DiltedData[i][0].AreaName ret.append(x) res.append(ret) return res
if __name__ == "__main__": #sess = tf.InteractiveSession() learning_rate = 0.001 training_epochs = 15 batch_size = 100 display_step = 1 num_feature_1st = 6 num_feature_2nd = 500 x = tf.placeholder(tf.float32, [None, 6]) y = tf.placeholder(tf.float32) data = dp.dataPrepareForLogistic(dp.datas, dp.path) trainX = data[0] testX = data[1] # a matrix trainY = data[2] # a vector with binary number testY = data[3] params = random_init(x, num_feature_1st, num_feature_2nd) # construct model pred = multilayer_perceptron(x, num_feature_1st, num_feature_2nd) cost = -tf.reduce_sum(y * tf.log(pred)) optimizer = tf.train.AdamOptimizer().minimize(cost) # construct logging with tf.name_scope("loss"): loss = -tf.reduce_sum(y * tf.log(pred)) loss_summary = tf.summary.scalar("loss", loss)
def SplineData(DiltedData, times=2) -> list: res = [] for i in range(DiltedData.__len__()): conf = GetCF(DiltedData[i]) rec = GetRe(DiltedData[i]) dea = GetDe(DiltedData[i]) datal = conf.__len__() base = np.linspace(0, datal + 1, datal + 2) splc = BSpline(base, conf, times) splr = BSpline(base, rec, times) spld = BSpline(base, dea, times) base2 = np.linspace(1, datal - 1, datal * 10) conf_, rec_, dea_ = splc(base2), splr(base2), spld(base2) ret = [] for j in range(0, conf_.__len__()): x = DP.Data() x.ConfirmedData = conf_[j] x.RecoverData = rec_[j] x.DeathData = dea_[j] x.AreaName = DiltedData[i][0].AreaName ret.append(x) res.append(ret) return res if __name__ == '__main__': data = DP.LoadData('data.csv') data = FiltData(data) Sdata = SplineData(data) print(Sdata[0][1040].RecoverData)
import numpy as np import pandas as pd from sklearn.cross_validation import train_test_split from sklearn import linear_model from sklearn.utils import shuffle import DataPrepare as dp if __name__ == "__main__": data = dp.dataPrepare(dp.datas, dp.path) trainX = data[0] testX = data[1] # a matrix trainY = data[2] # a vector with binary number testY = data[3] clf = linear_model.SGDClassifier(loss="log") clf.fit(trainX,trainY) print(clf.fit(trainX,trainY)) print(clf.coef_) print(clf.intercept_) #Now we will test our model pred_labels = clf.predict(testX)
import DataPrepare as dp import numpy as np import sys train_data_path = sys.argv[1] train_label_path = sys.argv[2] dev_data_path = sys.argv[3] dev_label_path = sys.argv[4] dict_path = sys.argv[5] pad_index = int(sys.argv[6]) max_length = int(sys.argv[7]) train_data = dp.create_data(train_data_path) train_label = dp.create_label(train_label_path) dev_data = dp.create_data(dev_data_path) dev_label = dp.create_label(dev_label_path) word_index_dict = dp.create_dict(dict_path) index_word_dict = dict([(value, key) for (key, value) in word_index_dict.items()]) vocab_size = 10000 train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=pad_index, padding='post', maxlen=max_length)
def Replace(self, profiles, cluster): ''' :param profiles: 完成的中心点 :param cluster: 字典形式的,以profiles为key,聚类簇value为列表格式 :return: 返回替换好的profiles ''' # 替换过程用离medoids最近的且满足要求的元素来替换 while True: iteration = True new_profiles = deepcopy(profiles) for profile in profiles: if not metric.checkOneTypical(self.features, profile, new_profiles, self.epsilon): new_profiles.remove(profile) # 对profile进行替换,在cluster[profile]寻找profile对其代表性最大的元素,且满足条件的来替换 R = np.load("new%sRepresentativeMatrix.npy" % self.features[profile][5]) # 加载id字典 open_file = open("new%sRepresentativeDictionary.pickle" % self.features[profile][5]) R_dic = pickle.load(open_file) open_file.close() # 在其聚类簇中寻找到其代表性最大的来替换 results = { id: R[R_dic[id]][R_dic[profile]] for id in cluster[profile] } # results = {element:metric.Repre(self.features[profile],self.features[element]) for element in cluster[profile]} results = sorted(results.items(), key=lambda key: key[1], reverse=True) flag = False # 在results中找到profile最能代表的,且满足领域典型要求的元素 for result in results: key = result[0] if metric.checkOneTypical(self.features, key, new_profiles, self.epsilon): new_profiles.add(key) cluster[key] = cluster[profile] cluster.pop(profile) flag = True break # 没找到领域典型的,需要在该领域的原集中去除这部分元素,重新聚类 if flag == False: iteration = False # 对该领域去除这部分元素后,重新寻找k个聚类簇 category = self.features[profiles][5] for profile in profiles: if self.features[profile][5] == category: new_profiles.remove(profile) # 获取该领域的人物集合 tuples = datapre.People(self.features)[category] # 去除cluster[profile]这部分元素 for element in tuples: if element in set(cluster[profile]): tuples.remove(element) number = 0 for profile in profiles: if self.features[profile][5] == category: number += 1 # 重新对tuples聚类 method = KMedoidsCluster( number, datapre.FeaturesById(tuples, self.features), category) clusters, medoids = method.Cluster() for key in clusters.keys(): cluster[key] = clusters[key] for element in medoids: new_profiles.add(element) # 此时new_profiles是最新的,继续向下替换 if iteration == True: break else: profiles = new_profiles return new_profiles
import DataPrepare import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F from torch.autograd import Variable torch.backends.cudnn.bencmark = True import argparse from matlab_cp2tself import get_similarity_transself_for_cv2 from get_landmarks import get_five_points_landmarks import net_sphere import mtcnn ##前端系统搭建 ##参数表 thres = 0.53 #识别阈值 datas = DataPrepare.ImagePrepare('images') imgs_alignment = datas.imgs_after_alignment imgs_features = datas.get_imgs_features(imgs_alignment) imgs_name_list = datas.imgs_name_list for i, img_name in enumerate(imgs_name_list): img_name = img_name.split('.')[0] imgs_name_list[i] = img_name imgs_name_list.append('unknown') Haar_front_scale = 1.1 #Haar正脸图像金字塔比例,1.1~1.4 Haar_front_neibor = 8 #Haar neibor参数,>=2 Haar_profile_scale = 1.1 Haar_profile_neibor = 3 resize_x_y = (1600, 900) #检测时如果需要resize图像的参数 resize_face = (250, 250) #检测到的人脸resize后的大小
def DomainDistribution(self, profiles): categories = datapre.DomainDistribution(profiles, self.features) return categories
import DataPrepare as DP import NN import matplotlib.pyplot as plt import numpy as np import spliner as sp data = DP.LoadData('data.csv') data = sp.SplineData(data, 3) size = 700 confirmed = np.array(DP.GetConfirmed(data, size)) #confirmed = np.array(DP.GetDeath(data, size)) confirmedRate = DP.GetRate(confirmed, size) plt.plot(confirmed[800]) plt.plot(np.array(confirmedRate[800]) * 50) plt.show() confirmedNN = NN.NN(confirmedRate, Units=256) #confirmedNN.load('Deathmodel.300-0.01.h5') confirmedNN.load('model.300-0.03.h5') #confirmedNN.train() confirmedRate = np.array(confirmedRate) ncovdata = DP.LoadData('datancov.csv') ncovdata = sp.SplineData(ncovdata, 1) ncovconfirmed = np.array(DP.GetDeath(ncovdata, size)) ncovRate = np.array(DP.GetRate(ncovconfirmed, size)) validation = np.array([ncovRate[0, :-1]]) result = validation curve = np.array([ncovconfirmed[660, :-1]])[-1]
import torch.nn as nn import torchvision import torchvision.transforms as transforms import numpy as np device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #sequence_length = 64 input_size = 26 hidden_size = 64 num_layers = 2 num_classes = 2 learning_rate = 0.01 fileLoader = DataPrepare.FileLoader() # Recurrent neural network (many-to-one) class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x):
import sys train_data_path = sys.argv[1] train_label_path = sys.argv[2] dev_data_path = sys.argv[3] dev_label_path = sys.argv[4] model_path = sys.argv[5] #dict_path="dict.txt" pad_index = 0 max_length = 256 train_data = dp.create_data(train_data_path) train_label = dp.create_label(train_label_path) dev_data = dp.create_data(dev_data_path) dev_label = dp.create_label(dev_label_path) #word_index_dict=dp.create_dict(dict_path) #index_word_dict=dict([(value, key) for (key,value) in word_index_dict.items()]) vocab_size = 50000 train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=pad_index, padding='post', maxlen=max_length) dev_data = keras.preprocessing.sequence.pad_sequences(dev_data,
if train_or_test == "train": # ------------------------------------------------------------------------------- # 1. Load Data # ------------------------------------------------------------------------------- train_inputs = "./TrainValiData/Train_inputSet_" + codec + \ "_defautLang_OLdata_ValiTrain_type" + type + "_Fram256_ceps.mat" train_targets = "./TrainValiData/Train_targetSet_" + codec + \ "_defautLang_OLdata_ValiTrain_type" + type + "_Fram256_ceps.mat" vali_inputs = "./TrainValiData/Vali_inputSet_" + codec + \ "_defautLang_OLdata_ValiTrain_smallVali_type" + type + "_Fram256_ceps.mat" vali_targets = "./TrainValiData/Vali_targetSet_" + codec + \ "_defautLang_OLdata_ValiTrain_smallVali_type" + type + "_Fram256_ceps.mat" x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets, vali_inputs, vali_targets) # ------------------------------------------------------------------------------- # 2. Init Cepstral-QSRCNN Model # ------------------------------------------------------------------------------- qsrcnn = model.CepstralQSRCNN(opt_params=default_opt_params, model_params=default_model_params, codec_type_params=codec_type_params) # ------------------------------------------------------------------------------- # 3. Fit The Cepstral-QSRCNNe Model # ------------------------------------------------------------------------------- hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali) # ------------------------------------------------------------------------------- # 4. Save Weights and Training Curves
def query(): listRows = DataPrepare.parseQueryFile(DataSpecific.queryDFile) dictValues = {} for row in listRows: print "Row: ",row, " Result:", queryRow(row)