def ReadAndExtractAll(fname='../data/features_all_v2.5.pkl'): ''' read all data, extract features, write to dill ''' short_pid, short_data, short_label = ReadData.ReadData( '../../data1/short.csv') long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') QRS_pid, QRS_data, QRS_label = ReadData.ReadData('../../data1/QRSinfo.csv') center_waves = ReadData.read_mean_wave('../../data1/centerwave_raw.csv') all_pid = QRS_pid feature_list, all_feature = GetAllFeature(short_data, long_data, QRS_data, long_pid, short_pid, center_waves) all_label = QRS_label print('ReadAndExtractAll done') print('all_feature shape: ', np.array(all_feature).shape) print('feature_list shape: ', len(feature_list)) np.nan_to_num(all_feature) with open(fname + '_feature_list.csv', 'w') as fout: for i in feature_list: fout.write(i + '\n') with open(fname, 'wb') as output: dill.dump(all_pid, output) dill.dump(all_feature, output) dill.dump(all_label, output) print('write done') return
def ReadAndExtractAll(fname='../data/features_all_v2.2.pkl'): ''' read all data, extract features, write to dill ''' short_pid, short_data, short_label = ReadData.ReadData( '../../data1/short.csv') long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') QRS_pid, QRS_data, QRS_label = ReadData.ReadData('../../data1/QRSinfo.csv') center_waves = ReadData.read_mean_wave( '../../data1/center_wave_euclid_direct.csv') all_pid = QRS_pid all_feature = GetAllFeature(short_data, long_data, QRS_data, long_pid, short_pid, center_waves) all_label = QRS_label print('ReadAndExtractAll done') print('all_feature shape: ', np.array(all_feature).shape) # with open(fname, 'wb') as output: # dill.dump(all_pid, output) # dill.dump(all_feature, output) # dill.dump(all_label, output) return
def run() : global data_path, problem_set, instance_num if request.method == 'POST': print(request.form, flush=True) return else: instance = request.args.get('instance') if instance == "" : return render_template("index.html",x = x,y = y,z = z) J = [] problem_set = instance[:1] instance_num = instance[1:] ReadData(os.path.join(APP_STATIC, data_path + problem_set + "_"+instance_num),J) ga = NSGA(500,3,Job_set = J,common_due_date=120) ga.run() pareto = [item for sublist in ga.nondominated_sort() for item in sublist] input = [[] for _ in range(2)] output = [[]] weight = [] for point in pareto : input[0].append(point.obj[0])#weighted tardiness input[1].append(point.obj[1])#total_flow_time output[0].append(point.obj[2])#pieces weight.append(point.weights) res = (DEA_analysis(input, output)) eff = [r['Efficiency'] for r in res] result["weight"] = weight result["Flow_time"] = input[1] result["Tardiness"] = input[0] result["Piece"] = output[0] result["DEA_score"] = eff return render_template("index.html",result = result)
def main(): seq = 1 data = ReadData(dsName='airsim', subType='mr', seq=seq) barNames = data.getNewImgNames(subtype='bar') pinNames = data.getNewImgNames(subtype='pin') dirBar = data.path + '/images_bar' dirPin = data.path + '/images_pin' if not os.path.exists(dirBar): os.makedirs(dirBar) if not os.path.exists(dirPin): os.makedirs(dirPin) N = data.imgs.shape[0] for i in range(0, N): img = data.imgs[i] img = np.reshape(img, (360, 720, 3)) pin = cv2.fisheye.undistortImage(img, K, D=D_pincus, Knew=K_pincus) bar = cv2.fisheye.undistortImage(img, K, D=D_barrel, Knew=K_barrel) # cv2.imshow('input', img) # cv2.imshow('pin', pin) # cv2.imshow('bar', bar) # cv2.waitKey(1) cv2.imwrite(barNames[i], bar * 255.0) cv2.imwrite(pinNames[i], pin * 255.0) print(i / N)
def read_data(): long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') # mat1 = [truncate_long(ts, 9000) for ts in long_data] # mat2 = [truncate_long(ts, 6000) for ts in long_data] mat3 = [truncate_long(ts, 3000) for ts in long_data] # mat4 = [sample_long(ts, 10) for ts in mat1] # mat5 = [sample_long(ts, 10) for ts in mat2] # mat6 = [sample_long(ts, 10) for ts in mat3] label_onehot = ReadData.Label2OneHot(long_label) # plt.plot(mat1[0]) # plt.plot(mat4[0]) mat = mat3 all_feature = np.array(mat, dtype=np.float32) all_label = np.array(label_onehot, dtype=np.float32) kf = StratifiedKFold(n_splits=5, shuffle=True) for train_index, test_index in kf.split(all_feature, long_label): train_data = all_feature[train_index] train_label = all_label[train_index] test_data = all_feature[test_index] test_label = all_label[test_index] break train_data = np.expand_dims(np.array(train_data, dtype=np.float32), axis=2) test_data = np.expand_dims(np.array(test_data, dtype=np.float32), axis=2) return train_data, train_label, test_data, test_label
def expand_three_part(): long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') kf = StratifiedKFold(n_splits=5, shuffle=True) for train_index, other_index in kf.split(np.array(long_data), np.array(long_label)): train_data = np.array(long_data)[train_index] train_label = np.array(long_label)[train_index] train_pid = np.array(long_pid)[train_index] other_data = np.array(long_data)[other_index] other_label = np.array(long_label)[other_index] other_pid = np.array(long_pid)[other_index] kf_1 = StratifiedKFold(n_splits=2, shuffle=True) for val_index, test_index in kf_1.split(np.array(other_data), np.array(other_label)): val_data = np.array(other_data)[val_index] val_label = np.array(other_label)[val_index] val_pid = np.array(other_pid)[val_index] test_data = np.array(other_data)[test_index] test_label = np.array(other_label)[test_index] test_pid = np.array(other_pid)[test_index] break break train_data_out, train_label_out, train_data_pid_out = slide_and_cut( list(train_data), list(train_label), list(train_pid)) val_data_out, val_label_out, val_data_pid_out = slide_and_cut( list(val_data), list(val_label), list(val_pid)) test_data_out, test_label_out, test_data_pid_out = slide_and_cut( list(test_data), list(test_label), list(test_pid)) print( len(set(list(train_pid)) & set(list(val_pid)) & set(list(test_pid))) == 0) # with open('../../data1/expanded_three_part_window_6000_stride_500_6.pkl', 'wb') as fout: # pickle.dump(train_data_out, fout) # pickle.dump(train_label_out, fout) # pickle.dump(val_data_out, fout) # pickle.dump(val_label_out, fout) # pickle.dump(test_data_out, fout) # pickle.dump(test_label_out, fout) # pickle.dump(test_data_pid_out, fout) ### use np.save to save larger than 4 GB data fout = open('../../data1/expanded_three_part_window_6000_stride_299.bin', 'wb') np.save(fout, train_data_out) np.save(fout, train_label_out) np.save(fout, val_data_out) np.save(fout, val_label_out) np.save(fout, test_data_out) np.save(fout, test_label_out) np.save(fout, test_data_pid_out) fout.close() print('save done')
def expand_all(): long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') data_out, label_out, pid_out = slide_and_cut(long_data, long_label, long_pid) ### use np.save to save larger than 4 GB data fout = open('../../data1/expanded_all_window_6000_stride_500.bin', 'wb') np.save(fout, data_out) np.save(fout, label_out) fout.close() print('save done')
def read_data(): long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') all_pid = np.array(long_pid) all_feature = np.array(long_data) all_label = np.array(long_label) print('read data done') data_out, label_out, pid_out = slide_and_cut(all_feature, all_label, all_pid) pid_map = {} for i in range(len(all_pid)): pid_map[all_pid[i]] = i return data_out, label_out, pid_out, pid_map
def read_data(): X = ReadData.read_centerwave('../../data1/centerwave_resampled.csv') _, _, Y = ReadData.ReadData('../../data1/QRSinfo.csv') all_feature = np.array(X) print(all_feature.shape) all_label = np.array(Y) all_label_num = np.array(ReadData.Label2OneHot(Y)) kf = StratifiedKFold(n_splits=5, shuffle=True) i_fold = 1 print('all feature shape: {0}'.format(all_feature.shape)) for train_index, test_index in kf.split(all_feature, all_label): train_data = all_feature[train_index] train_label = all_label_num[train_index] test_data = all_feature[test_index] test_label = all_label_num[test_index] print('read data done') return all_feature, all_label_num, train_data, train_label, test_data, test_label
def read_data(): long_pid, long_data, long_label = ReadData.ReadData( '../../data1/centerwave.csv' ) mat1 = [truncate_long(ts, 9000) for ts in long_data] mat2 = [truncate_long(ts, 6000) for ts in long_data] mat3 = [truncate_long(ts, 3000) for ts in long_data] mat4 = [sample_long(ts, 10) for ts in mat1] mat5 = [sample_long(ts, 10) for ts in mat2] mat6 = [sample_long(ts, 10) for ts in mat3] label_onehot = ReadData.Label2OneHot(long_label) # plt.plot(mat1[0]) # plt.plot(mat4[0]) mat1 = np.expand_dims(np.array(mat1), axis=2) label_onehot = np.array(label_onehot) return mat1, label_onehot
def main(): import ReadData filename = r'train_data.txt' print '.........读取数据...........' UsersItems, Items = ReadData.ReadData(filename) print '.........分割数据...........' train, test = ReadData.divideData(UsersItems) print '.............训练推荐............' flag = 0 #0: 表示训练测试, 1:表示生成结果 near_num = 200; top_num = 1 hiddenStates_num = 10; max_iter=30 mPLSA = CpLSA() if flag==0: mPLSA.transformData(train) mPLSA.process(hiddenStates_num, max_iter) simUsers = mPLSA.calSimUsers(near_num) elif flag==1: mPLSA.transformData(UsersItems) mPLSA.process(hiddenStates_num, max_iter)
def initHelper(self, dsName='airsim', subType='mr', seq=[1, 3, 5]): self.dsName = dsName self.numChannel = 3 if self.dsName is not 'euroc' else 1 self.subType = subType self.numDataset = len(seq) dataObj = [ReadData(dsName, subType, seq[i]) for i in range(0, self.numDataset)] # get number of data points self.numDataList = [dataObj[i].numData for i in range(0, self.numDataset)] self.numTotalData = np.sum(self.numDataList) self.numTotalImgData = np.sum([dataObj[i].numImgs for i in range(0, self.numDataset)]) print(self.numDataList) print(self.numTotalData) # numeric data print('numeric data concat') self.dt = np.concatenate([dataObj[i].dt for i in range(0, self.numDataset)], axis=0) self.du = np.concatenate([dataObj[i].du for i in range(0, self.numDataset)], axis=0) self.dw = np.concatenate([dataObj[i].dw for i in range(0, self.numDataset)], axis=0) self.dw_gyro = np.concatenate([dataObj[i].dw_gyro for i in range(0, self.numDataset)], axis=0) self.dtrans = np.concatenate([dataObj[i].dtr for i in range(0, self.numDataset)], axis=0) self.dtr_gnd = np.concatenate([dataObj[i].dtr_gnd for i in range(0, self.numDataset)], axis=0) self.pos_gnd = np.concatenate([dataObj[i].pos_gnd for i in range(0, self.numDataset)], axis=0) self.rotM_bdy2gnd = np.concatenate([dataObj[i].rotM_bdy2gnd for i in range(0, self.numDataset)], axis=0) self.acc_gnd = np.concatenate([dataObj[i].acc_gnd for i in range(0, self.numDataset)], axis=0) print('done numeric data concat') # img data print('img data concat') self.numTotalImgs = sum([dataObj[i].numImgs for i in range(0, self.numDataset)]) self.imgs = np.zeros((self.numTotalImgData, self.numChannel, 360, 720), dtype=np.float32) s, f = 0, 0 for i in range(0, self.numDataset): temp = dataObj[i].numImgs f = s + temp self.imgs[s:f, :] = dataObj[i].imgs dataObj[i] = None s = f dataObj = None print('done img data concat')
def read_seq(): long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') seq_pid = [] seq_data = [] seq_label = [] seq_len = 1000 for i in range(len(long_pid)): ts = long_data[i] for j in range(len(ts) // seq_len): seq_data.append(ts[j * seq_len:(j + 1) * seq_len]) seq_pid.append(long_pid[i]) seq_label.append(long_label[i]) long_label = seq_label seq_data = np.array(seq_data, dtype=np.float32) seq_data = normalize(seq_data, axis=0) seq_label = ReadData.Label2OneHot(seq_label) seq_label = np.array(seq_label, dtype=np.float32) all_feature = seq_data all_label = seq_label kf = StratifiedKFold(n_splits=5, shuffle=True) for train_index, test_index in kf.split(all_feature, long_label): train_data = all_feature[train_index] train_label = all_label[train_index] test_data = all_feature[test_index] test_label = all_label[test_index] break train_data = np.expand_dims(np.array(train_data, dtype=np.float32), axis=2) test_data = np.expand_dims(np.array(test_data, dtype=np.float32), axis=2) return train_data, train_label, test_data, test_label
c = "g" for j in F[i]: if c == 'r': ax.scatter(j.obj[0], j.obj[1], j.obj[2], c=c, alpha=1) else: ax.scatter(j.obj[0], j.obj[1], j.obj[2], c=c, alpha=alpha) ax.set_xlabel("Tardiness") ax.set_ylabel("Flow Time") ax.set_zlabel("Pieces") plt.show() if __name__ == "__main__": path = './Experiment_Data1/data_1_200' J = [] ReadData(path, J) ga = NSGA(3000, 5, Job_set=J, common_due_date=120) ga.run() pareto = ga.nondominated_sort()[0] input = [[] for _ in range(2)] output = [[]] for point in pareto: input[0].append(point.obj[0]) input[1].append(point.obj[1]) output[0].append(point.obj[2]) ga.plot(0.5) res = (DEA_analysis(input, output)) for idx, r in enumerate(res): print(
return train_data, train_label, val_data, val_label, test_data, test_label, test_pid if __name__ == '__main__': '''all_data, all_label, all_pid, pid_map = read_data() out_feature = get_resnet_feature(all_data, all_label, all_pid, pid_map) print('out_feature shape: ', out_feature.shape) with open('../data/feat_resnet.pkl', 'wb') as fout: dill.dump(out_feature, fout) ''' ''' #-----------------------------------------------test-------------------------------------------- ''' train_data, train_label, val_data, val_label, test_data, test_label, test_pid = read_data_from_pkl( ) long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') new_test = [] new_pid = [] new_label = [] for j in range(len(long_pid)): for i in range(len(test_pid)): if long_pid[j] == test_pid[i]: new_test.append(long_data[j]) new_pid.append(long_pid[j]) new_label.append(long_label[j]) out_label = ReadData.Label2OneHot(new_label) out_label = np.array(out_label, dtype=np.float32) new_test = np.array(new_test) new_pid = np.array(new_pid)
__author__ = 'saghar hosseini ([email protected])' import numpy as np from ReadData import * from projection import * ########################################################################################## # Load Data ########################################################################################## # path="C:/Users/sagha_000/Documents/SVN/My_SVN/TimeVaryingSocialNetworks/datasets/as-733/" path = "F:/Saghar_SVN/TimeVaryingSocialNetworks/datasets/twitter-pol-dataset/graphs/" dataset = ReadData(path) edges = dataset.read_network_snapshot(1, hasHeader=True) nodes_list = set(edges.keys()) output_path = 'F:/Saghar_SVN/TimeVaryingSocialNetworks/datasets/twitter-pol-dataset/Results/wo_OPD/' ############################################################################################ # Define Parameters ############################################################################################ numberOfSnapshots = 1175 numCommunity = 10 mu = 0.1 lambdah_C = 0.0 lambdah_B = 0.0 sampleFraction = 0.25 n = len(nodes_list) K_B = 1.0 K_C = 1.0 ############################################################################################# #variables learning_rate_C = {} initial_state = dict() state = dict() visit = {}
### pred ### alert: encase is naop, lr is anop pred_proba_ENCASE = clf_ENCASE.predict_prob(feature_ENCASE)[0] pred_proba_mimic = get_mimic_proba(long_data[0]) pred_final = 1 / 2 * pred_proba_ENCASE + 1 / 2 * pred_proba_mimic print('{0}\n{1}\n{2}'.format(pred_proba_ENCASE, pred_proba_mimic, pred_final)) pred_label = labels[np.argsort(pred_final)[-1]] return pred_label if __name__ == '__main__': short_pid, short_data, short_label = ReadData.ReadData( '../../data_val/short.csv') long_pid, long_data, long_label = ReadData.ReadData( '../../data_val/long.csv') QRS_pid, QRS_data, QRS_label = ReadData.ReadData( '../../data_val/QRSinfo.csv') print('=' * 60) print('pred begin') # short_data = short_data[:100] # long_data = long_data[:3] # QRS_data = QRS_data[:3] # long_pid = long_pid[:3] # short_pid = short_pid[:100] with open('../model/v2.5_xgb5_all.pkl', 'rb') as fin: clf_ENCASE = dill.load(fin)
row.extend(CDF(ts)) row.extend(CoeffOfVariation(ts)) row.extend(MAD(ts)) row.extend(QRSBasicStat(ts)) row.extend(QRSBasicStatPointMedian(ts)) row.extend(QRSBasicStatDeltaRR(ts)) row.extend(QRSYuxi(ts)) row.extend(Variability(ts)) row.extend(minimum_ncce(ts)) row.extend(bin_stat(ts)) row.extend(qrs_autocorr(ts)) ### no features.append(row) step += 1 if step % 1000 == 0: print('extracting ...', step) # break print('extract QRS DONE') return feature_list, features if __name__ == '__main__': QRS_pid, QRS_data, QRS_label = ReadData.ReadData('../../data1/QRSinfo.csv') tmp_features = get_qrs_feature(QRS_data[:1]) print(len(tmp_features[1][0]))
features = [] step = 0 for ts in table: row = [] row.extend(short_basic_stat(ts)) # row.extend(short_zero_crossing(ts)) features.append(row) step += 1 if step % 100000 == 0: print('extracting ...') # break print('extract DONE') return feature_list, features if __name__ == '__main__': short_pid, short_data, short_label = ReadData.ReadData( '../../data1/short.csv' ) QRS_pid, QRS_data, QRS_label = ReadData.ReadData( '../../data1/QRSinfo.csv' ) tmp_features = get_short_stat_wave_feature(short_data[:10], short_pid[:10], QRS_pid[0]) print(len(tmp_features[1][0]))
import dill import features_all ##import challenge_encase_mimic ############## #### load classifier ############### #with open('model/v2.5_xgb5_all.pkl', 'rb') as my_in: # clf_final = dill.load(my_in) ############## #### read and extract ############### ##short_pid1-12 short_label1-12 are same long_pid0, long_data0, long_label0 = ReadData.ReadData('../data1/long0.csv') long_pid1, long_data1, long_label1 = ReadData.ReadData('../data1/long1.csv') long_pid2, long_data2, long_label2 = ReadData.ReadData('../data1/long2.csv') long_pid3, long_data3, long_label3 = ReadData.ReadData('../data1/long3.csv') long_pid4, long_data4, long_label4 = ReadData.ReadData('../data1/long4.csv') long_pid5, long_data5, long_label5 = ReadData.ReadData('../data1/long5.csv') long_pid6, long_data6, long_label6 = ReadData.ReadData('../data1/long6.csv') long_pid7, long_data7, long_label7 = ReadData.ReadData('../data1/long7.csv') long_pid8, long_data8, long_label8 = ReadData.ReadData('../data1/long8.csv') long_pid9, long_data9, long_label9 = ReadData.ReadData('../data1/long9.csv') long_pid10, long_data10, long_label10 = ReadData.ReadData( '../data1/long10.csv') long_pid11, long_data11, long_label11 = ReadData.ReadData( '../data1/long11.csv') short_pid0, short_data0, short_label0 = ReadData.ReadData(
estimate = np.rint(prob.item(0)) estimate_avg = np.rint(prob_avg.item(0)) if (i in edges.keys() and j in edges[i]) or j == i: error += abs(1.0 - estimate) error_avg += abs(1.0 - estimate_avg) else: error += abs(0.0 - estimate) error_avg += abs(0.0 - estimate_avg) return error, error_avg ######################################################################################### if __name__ == '__main__': path = "F:/Saghar_SVN/TimeVaryingSocialNetworks/datasets/as-733/" # path="C:/Users/sagha_000/Documents/SVN/My_SVN/TimeVaryingSocialNetworks/datasets/as-733/" Config.dataset = ReadData(path) numberOfSnapshots = Config.numberOfSnapshots numCommunity = Config.numCommunity error = [0.0] * numberOfSnapshots error_avg = error[:] error_file = path + 'error_output.csv' test_array = [] num_cores = multiprocessing.cpu_count() for t in range(numberOfSnapshots): Config.edges = Config.dataset.read_network_snapshot(t) filename = path + 'state_output' + str(t) + '.csv' Config.state = read_in_states(filename, has_header=True) Config.nodes_list = set(Config.edges.keys()) Config.nodes_list.union(Config.state.keys()) #make it parallel for i in Config.state.keys():
def train(train_dir=None, val_dir=None, mode='train'): if FLAGS.model == 'lstm': model = cnn_lstm_otc_ocr.LSTMOCR(mode) else: print("no such model") sys.exit() #开始构建图 model.build_graph() #########################read train data############################### print('loading train data, please wait---------------------') train_feeder = utils.DataIterator(data_dir=FLAGS.train_dir, istrain=True) print('get image data size: ', train_feeder.size) filename = train_feeder.image label = train_feeder.labels print(len(filename)) train_data = ReadData.ReadData(filename, label) ##################################read test data###################################### print('loading validation data, please wait---------------------') val_feeder = utils.DataIterator(data_dir=FLAGS.val_dir, istrain=False) filename1 = val_feeder.image label1 = val_feeder.labels test_data = ReadData.ReadData(filename1, label1) print('val get image: ', val_feeder.size) ##################计算batch 数 num_train_samples = train_feeder.size num_batches_per_epoch = int(num_train_samples / FLAGS.batch_size) # 训练集一次epoch需要的batch数 num_val_samples = val_feeder.size num_batches_per_epoch_val = int(num_val_samples / FLAGS.batch_size) # 验证集一次epoch需要的batch数 ###########################data################################################ with tf.device('/cpu:0'): config = tf.ConfigProto(allow_soft_placement=True) #######################read data################################### with tf.Session(config=config) as sess: #初始化data迭代器 train_data.init_itetator(sess) test_data.init_itetator(sess) train_data = train_data.get_nex_batch() test_data = test_data.get_nex_batch() #全局变量初始化 sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) #存储模型 train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) #导入预训练模型 if FLAGS.restore: ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if ckpt: # the global_step will restore sa well saver.restore(sess, ckpt) print('restore from the checkpoint{0}'.format(ckpt)) else: print("No checkpoint") print( '=============================begin training=============================' ) accuracy_res = [] epoch_res = [] tmp_max = 0 tmp_epoch = 0 for cur_epoch in range(FLAGS.num_epochs): train_cost = 0 batch_time = time.time() for cur_batch in range(num_batches_per_epoch): #获得这一轮batch数据的标号############################## #read_data_start = time.time() batch_inputs, batch_labels = sess.run(train_data) #print('read data timr',time.time()-read_data_start) process_data_start = time.time() #print('233333333333333',type(batch_labels)) new_batch_labels = utils.sparse_tuple_from_label( batch_labels.tolist()) # 对了 batch_seq_len = np.asarray( [FLAGS.max_stepsize for _ in batch_inputs], dtype=np.int64) #print('process data timr', time.time() - process_data_start) #train_data_start = time.time() #print('2444444',batch_inputs.shape()) feed = { model.inputs: batch_inputs, model.labels: new_batch_labels, model.seq_len: batch_seq_len } # if summary is needed # batch_cost,step,train_summary,_ = sess.run([cost,global_step,merged_summay,optimizer],feed) summary_str, batch_cost, step, _ = \ sess.run([model.merged_summay, model.cost, model.global_step, model.train_op], feed) # calculate the cost train_cost += batch_cost * FLAGS.batch_size #print train_cost #train_writer.add_summary(summary_str, step) #print('train data timr', time.time() - train_data_start) # save the checkpoint if step % FLAGS.save_steps == 1: if not os.path.isdir(FLAGS.checkpoint_dir): os.mkdir(FLAGS.checkpoint_dir) logger.info('save the checkpoint of{0}', format(step)) saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'ocr-model'), global_step=step) if (cur_batch) % 100 == 1: print('batch', cur_batch, ': time', time.time() - batch_time, 'loss', batch_cost) batch_time = time.time() # train_err += the_err * FLAGS.batch_size # do validation if step % FLAGS.validation_steps == 0: validation_start_time = time.time() acc_batch_total = 0 lastbatch_err = 0 lr = 0 for j in range(num_batches_per_epoch_val): batch_inputs, batch_labels = sess.run(test_data) new_batch_labels = utils.sparse_tuple_from_label( batch_labels.tolist()) # 对了 batch_seq_len = np.asarray( [FLAGS.max_stepsize for _ in batch_inputs], dtype=np.int64) val_feed = { model.inputs: batch_inputs, model.labels: new_batch_labels, model.seq_len: batch_seq_len } dense_decoded, lr = \ sess.run([model.dense_decoded, model.lrn_rate], val_feed) acc = utils.accuracy_calculation( batch_labels.tolist(), dense_decoded, ignore_value=-1, isPrint=True) acc_batch_total += acc accuracy = (acc_batch_total * FLAGS.batch_size) / num_val_samples accuracy_res.append(accuracy) epoch_res.append(cur_epoch) if accuracy > tmp_max: tmp_max = accuracy tmp_epoch = cur_epoch avg_train_cost = train_cost / ( (cur_batch + 1) * FLAGS.batch_size) # train_err /= num_train_samples now = datetime.datetime.now() log = "{}/{} {}:{}:{} Epoch {}/{}, " \ "max_accuracy = {:.3f},max_Epoch {},accuracy = {:.3f},acc_batch_total = {:.3f},avg_train_cost = {:.3f}, " \ " time = {:.3f},lr={:.8f}" print( log.format(now.month, now.day, now.hour, now.minute, now.second, cur_epoch + 1, FLAGS.num_epochs, tmp_max, tmp_epoch, accuracy, acc_batch_total, avg_train_cost, time.time() - validation_start_time, lr))
F1_list.append(F1_test) wrong_stat.extend(MyEval.WrongStat(i_fold, pred, test_label, test_pid)) i_fold += 1 clf_final_list.append(clf_final) avg_f1 = np.mean(F1_list) print('\n\nAvg F1: ', avg_f1) fout.write(str(avg_f1) + '=============================\n') wrong_stat = pd.DataFrame(wrong_stat, columns=['i_fold', 'pid', 'gt', 'pred']) # wrong_stat.to_csv('../../stat/wrong_stat_f1'+str(np.mean(F1_list))+'.csv') if __name__ == "__main__": all_feature = ReadData.read_centerwave('../../data1/centerwave_raw.csv') all_pid, _, all_label = ReadData.ReadData('../../data1/QRSinfo.csv') # print(sorted([len(i) for i in all_feature])[:100]) all_feature = [np.array(i) for i in all_feature] # all_pid = all_pid[:5] # all_label = all_label[:5] # all_feature = all_feature[:5] print('read data done') fout = open('../../logs/knn', 'w') for i in range(100): TestKNN(all_pid, all_feature, all_label, fout) fout.close()
clf_ENCASE = dill.load(fin) ### extract features feature_ENCASE = GetAllFeature_test(short_data, long_data, QRS_data, long_pid, short_pid) feature_ENCASE[0][-1] = feature_ENCASE[0][-1] + 0.0000001 ### pred ### alert: encase is naop, lr is anop pred_proba_ENCASE = clf_ENCASE.predict_prob(feature_ENCASE)[0] pred_proba_mimic = get_mimic_proba(long_data[0]) pred_final = 1 / 2 * pred_proba_ENCASE + 1 / 2 * pred_proba_mimic print('{0}\n{1}\n{2}'.format(pred_proba_ENCASE, pred_proba_mimic, pred_final)) pred_label = labels[np.argsort(pred_final)[-1]] return pred_label if __name__ == '__main__': short_pid, short_data, short_label = ReadData.ReadData('data1/short.csv') long_pid, long_data, long_label = ReadData.ReadData('data1/long.csv') QRS_pid, QRS_data, QRS_label = ReadData.ReadData('data1/QRSinfo.csv') print('=' * 60) print('pred begin') res = pred_one_sample(short_data[0:40], long_data[0:1], QRS_data[0:1], long_pid[0:1], short_pid[0:40]) print('pred done, the label of {0} is {1}'.format(long_pid[0], res))
import ReadData import Classifier train_acc = [] test_acc = [] cm = [] for i in range(10): segsamplesize = 8 firstk = 20 secondk = 6 numrdsamples = firstk * 150 testtrainsplit = 0.2 ##Read Files rd = ReadData.ReadData(testtrainsplit) rd.ReadFiles() rd.SeparateData() ##Generate Training data #Usage Argument : Training data, segment size, Outer k means, Inner K means, Random sample size for Outer K means trvq = GenTrainingData.GenTrainingData(rd.X_train, segsamplesize, firstk, secondk, numrdsamples) trvq.GenClusterData() trvq.GenClusters() trvq.VectorQuantization() ##Generate Training data #Usage Argument : Training data, firstcentroids, secondcentroids, segment size, Outer k means, Inner K means tevq = GenTestData.GenTestData(rd.X_test, trvq.firstcentroids,
import numpy as np import ReadData import Sequence from sklearn.metrics import roc_auc_score import Distance #name="human_muscle" #name="fly_blastoderm" name="human_HBB" ## 获取数据集 整个数据集,正数据集,负数据集 都是序列,没有标签 #datasets,pos,neg=rd.getData2("fly_blastoderm") ## 获取数据 kmer 从2-->6 print("---------",name,"------------") rd=ReadData.ReadData() datasets,pos,neg=rd.getData2(name) ## 正例数据集条数 possize=len(pos) ## 合并两个list datasets=pos+neg sq=Sequence.Sequence() flag=True ## 获取整个数据集的kmer集合,dict ## 获取字典集合 d2set,d2dic=sq.getSeqKerSet(datasets,2)
ans[i, 0] = A[i, 0] / B[i, i] return ans def K_effective(newmark, mass, stiffness): b1 = newmark.b1 K = b1 * mass + stiffness return K dt = 0.01 clock = Clock(dt, [0, 2]) newmark = Newmark(dt) assembly = Assembly(newmark) ReadData(assembly, '1FL.xlsx') neq = assembly.eq_number output_check(assembly, 'formal_1FL_check.txt') with open('formal_1FL.txt', 'w') as p: p.write('%8s %7s %12s' % ('time', 'iter', 'ux_top')) tol = 1e-4 # f = TimeSeries(0, 1, [0, 100, 100]) seismic = assembly.seismics[0] # ef = dok_matrix([[f.at(clock.current_time)/2], [0], [0], [0], [0], [0]]) while not clock.is_end: ef = LM.force_ex(assembly, seismic.at(clock.current_time)) P = assembly.get_internal_force('d', 'pos_origin') a = assembly.get_dof('a')