def main(): equation = [] if not os.path.exists(dir): os.mkdir(dir) os.chdir(dir) num = int(input("请输入一共有多少个同余方程:")) with open('da ta.txt', 'w') as f: for i in range(num): equation.append(input("请输入第%d个方程:" % i) + "\n") f.write(equation[i]) else: print("目录已存在") os.chdir(dir) data = Data('./data.txt') if data.solution_judge(): print("该同余式组有解") if data.crt_judge(): print("该同余式组可以用中国剩余定理") print("解为: x=%d(mod %d)" % Crt(data).crt_compute()) else: print("该同余式组不可以用中国剩余定理,以一般方法求解") print("解为: x=%d(mod %d)" % Nst(data).nst_compute()) else: print("该同余式组无解") return False
def main(): global solution # 创建Data类实例对象 data = Data('src_data/data.txt') # 调用data对象的solution_judge方法,来判断该同余方程组是否有解 if (data.error_flag): if data.solution_judge(): print("该同余式组有解") # 如果有解,则分两种方式求解 # 先调用crt_judge方法,判断是否可以用中国剩余定理求解 if data.crt_judge(): print("该同余式组可以用中国剩余定理") sol = "解为: x=%d(mod %d)" % Crt(data).crt_compute() print(sol) # 将答案写入文件 with open("src_data/data.txt", "a") as f: f.write("\n" + sol) return sol # 如果crt_judge方法返回0,则调用nst方法,用一般求解方式进行求解 else: print("该同余式组不可以用中国剩余定理,以一般方法求解") sol = "解为: x=%d(mod %d)" % Nst(data).nst_compute() print(sol) # 将答案写入文件 with open("src_data/data.txt", "a") as f: f.write("\n" + sol) return sol else: solution = "该同余式组无解" print("该同余式组无解") return solution else: return data.solution
def __init__(self, hparams, data_dir): self.hparams = hparams self.data_dir = data_dir #logger self._logger = logging.getLogger(__name__) #data_process self.data_process = Data(self.hparams, self.data_dir) (self.char_inputs, self.char_lengths), (self.inputs, self.labels, self.lengths) = \ self.data_process.load_data() #char, id self.char2id = self.data_process.char2id self.id2char = self.data_process.id2char # word, id self.word2id = self.data_process.word2id # dict() self.id2word = self.data_process.id2word # vocabulary # label, id self.label2id = self.data_process.label2id self.id2label = self.data_process.id2label # pre-trained word2vec with np.load(os.path.join(self.hparams.glove_dir, "glove.6B.300d.trimmed.npz")) as pretrained_data: self.word_embeddings = pretrained_data["embeddings"] print(np.shape(self.word_embeddings))
def nst_compute(self): for i in range(self.num_equation): a, b = prime_Decomposition(self.mod[i]) self.base2.extend(a) self.exponent.extend(b) for j in range(len(a)): self.remainder2.append(self.remainder[i]) self.num_equation2 += 1 # print("********************") # print(self.base2) # print(self.exponent) # print(self.num_equation) # print(self.num_equation2) # print("********************") for i in range(self.num_equation2): for j in range(i+1, self.num_equation2): if self.base2[i] == self.base2[j]: flag = int(self.exponent[i] > self.exponent[j]) c = [self.exponent[j], self.exponent[i]][self.exponent[j] > self.exponent[i]] b = [self.exponent[i], self.exponent[j]][self.exponent[j] > self.exponent[i]] if abs(self.remainder2[i] - self.remainder2[j]) % pow(self.base2[i], c) == 0: if flag: self.remainder3.append(self.remainder2[i]) else: self.remainder3.append(self.remainder2[j]) self.base3.append(self.base2[i]) self.exponent3.append(b) self.num_equation3 += 1 else: print("矛盾,该同余式组无解") for index, value in enumerate(self.base2): if self.base2.count(value) == 1: self.remainder3.append(self.remainder2[index]) self.base3.append(self.base2[index]) self.exponent3.append(self.exponent[index]) self.num_equation3 += 1 # print("&&&&&&&&&&&&&&&&&&&&&&") # print(self.base3) # print(self.exponent3) # print(self.num_equation3) # print(self.remainder3) # print("&&&&&&&&&&&&&&&&&&&&&&") with open("transformed_data.txt", 'w') as f: for i in range(self.num_equation3): f.write("x=%d(mod%d)\n" % (self.remainder3[i], pow(self.base3[i], self.exponent3[i]))) transformed_data = Data("./transformed_data.txt") if transformed_data.crt_judge(): print("该同余式组可以用中国剩余定理") tcrt = Crt(transformed_data) return tcrt.crt_compute() else: print("以一般方法 递归 求解") return Nst(transformed_data).nst_compute()
def _pre_process(self): self.model_file = os.path.join(self.model_dir, 'model.ckpt') self.meta_file = os.path.join(self.model_dir, 'model.ckpt.meta') var_file = os.path.join(self.model_dir, 'var.pkl') with open(var_file, 'rb') as f: self.var, self.config = pickle.load(f) basic_config = config.basic_config() basic_config.__dict__.update(self.config) self.config = basic_config vocab_file = './data/vocab.txt' self.data_tools = Data(vocab_file, None, basic_config, logging) self.tokenizer = Tokenizer(logging)
def __init__(self): config = Config() self.emb = UtteranceEmbed(config.word2vec_filename) train_dataset = Data(config.train_filename, config.test_filename).train_set random.shuffle(train_dataset) self.train_dataset = train_dataset[:361] self.test_dataset = train_dataset[361:] self.cate_mapping_dict = joblib.load(config.cate_mapping_dict) self.bow_embed = BowEmbed(self.train_dataset, self.train_dataset, 200) nb_hidden = 128 # obs_size = self.emb.dim + self.bow_embed.get_vocab_size() obs_size = self.emb.dim self.memory_net = Memory_net(obs_size, nb_hidden)
def main(config): data = Data(config.train_filename, config.dev_filename, config.test_filename) train_data = data.train_set dev_data = data.dev_set test_data = data.test_set # load word2vec utter_embed = UtteranceEmbed(config.word2vec_filename) input_size = (utter_embed.get_vector_size() * 2) # concat size num_neurons = [7500, 7500, 5000, 2500] model = Dnn(input_size, num_neurons, 3, utter_embed, config) model.build() model.train(train_data, dev_data, test_data)
def predict_score(): data = Data() test_features = feature_extraction(data, "test") submission = pd.DataFrame.from_dict({'id': data.test['id']}) for i in range(len(data.classes)): print("Processing " + data.classes[i]) lr_model_pkl = open( '../model/logistic_regression_%s.pkl' % data.classes[i], 'rb') lr_model = pickle.load(lr_model_pkl) print( "Loaded Logistic Regression Model for class %s :: " % data.classes[i], lr_model) submission[data.classes[i]] = lr_model.predict_proba(test_features)[:, 1] print(submission.head(5)) print("Saving output") submission.to_csv('../data/output.csv', index=False) print("Output saved")
def create_and_save(): data = Data() train_features = feature_extraction(data, "train") scores = [] # print (train_features.shape) # kbest = SelectKBest(chi2, k=1000) for i in range(len(data.classes)): print("Processing " + data.classes[i]) train_target = data.train[data.classes[i]] # x_feature = kbest.fit_transform(train_features, train_target) # print (x_feature) classifier = LogisticRegression(solver='sag') cv_score = np.mean( cross_val_score(classifier, train_features, train_target, cv=3, scoring='roc_auc')) # cv_score = np.mean(cross_val_score(classifier, x_feature, train_target, cv=3, scoring='roc_auc')) scores.append(cv_score) print('CV score for class {} is {}'.format(data.classes[i], cv_score)) # Calculate ROC_AUC roc_auc(train_features, np.array(train_target), data.classes[i]) print("Creating model for class " + data.classes[i]) classifier.fit(train_features, train_target) # classifier.fit(x_feature, train_target) print("Saving model logistic_regression_%s" % data.classes[i]) lr_pkl_filename = '../model/logistic_regression_%s.pkl' % data.classes[ i] lr_model_pkl = open(lr_pkl_filename, 'wb') pickle.dump(classifier, lr_model_pkl) lr_model_pkl.close() print("Model saved") print('Total CV score is {}'.format(np.mean(scores))) print("Successfully created and saved all models!")
total_loss / data_len, total_acc / data_len)) np.save(result_src, total_y_pre) return y, total_y_pre if __name__ == "__main__": # 模型路径 model_save_src = "data/model/2_layer_lstm_model" num_category = 9 # 向量化后的数据集 x_src = "data/vectorized_data/test/x.npy" y_src = "data/vectorized_data/test/y.npy" result_src = "data/results/rnn_pre.npy" vocab_src = "data/middle_result/vocab.npy" data = Data() vocab, _ = data.load_vocab(vocab_src) # 模型 config = TRNNConfig() config.vocab_size = len(vocab) model = TextRNN(config) # 测试 print("Begin Testing") start_time = time.time() y, y_pre = test(x_src, y_src, result_src) print("the time is {}".format(get_time_dif(start_time))) # 评估 precision_score, recall_score, f1_val, accuracy = evaluate(y, y_pre)
def train(): epochs = 50 length = 10 n_units = 128 n_features = 6 batch_size = 64 data = Data(batch_size) num_batches = data.num_batches() xplaceholder = tf.placeholder(tf.float32, shape=[None, length, n_features]) yplaceholder = tf.placeholder(tf.float32, shape=[None, 1]) midPrice_means = tf.placeholder(tf.float32, shape=[None, 1]) midPrice_stddevs = tf.placeholder(tf.float32, shape=[None, 1]) #origin midPrice origin_midPrice = yplaceholder * midPrice_stddevs + midPrice_means pred = lstm_model(xplaceholder, n_units) #pred midPrice pred_midPrice = pred * midPrice_stddevs + midPrice_means loss = tf.losses.mean_squared_error(labels=yplaceholder, predictions=pred) tf.summary.scalar('loss', loss) accuracy = tf.sqrt( tf.losses.mean_squared_error(origin_midPrice, pred_midPrice)) tf.summary.scalar('accuracy', accuracy) step = tf.Variable(0) learning_rate = 1e-4 tf.summary.scalar('learning rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate).minimize( loss, global_step=step) merged = tf.summary.merge_all() init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) train_writer = tf.summary.FileWriter('log', graph=sess.graph) saver = tf.train.Saver(max_to_keep=3) step_val = None for epoch in range(epochs): data.reset_batch() total_loss = 0.0 total_acc = 0.0 for i in range(num_batches): batch_inputs, batch_labels, batch_means, batch_stddevs = data.next_batch( ) feed_dict = { xplaceholder: batch_inputs, yplaceholder: batch_labels, midPrice_means: batch_means, midPrice_stddevs: batch_stddevs } _, loss_val, acc_val, step_val, summary = sess.run( [optimizer, loss, accuracy, step, merged], feed_dict=feed_dict) total_acc += acc_val total_loss += loss_val train_writer.add_summary(summary, global_step=step_val) print 'Epoch', epoch, 'train_loss', total_loss / num_batches, 'train_acc', total_acc / num_batches ''' dev_inputs, dev_labels = data.get_dev_data() feed_dict = {xplaceholder: dev_inputs, yplaceholder: dev_labels} acc_val, loss_val = sess.run([accuracy, loss], feed_dict = feed_dict) print 'dev_loss', loss_val, 'dev_acc', acc_val ''' outfile = open('outputs10.csv', 'w') outfile.write('midprice\n') test_inputs_list, test_means_list, test_stddevs_list = data.get_test_data( ) for i in range(data.test_num_half_day): test_means = [] test_stddevs = [] test_inputs = test_inputs_list[i] mean = test_means_list[i][0] stddev = test_stddevs_list[i][0] for j in range(len(test_inputs)): test_means.append(mean) test_stddevs.append(stddev) test_inputs = np.asarray(test_inputs) test_means = np.asarray(test_means).reshape([-1, 1]) test_stddevs = np.asarray(test_stddevs).reshape([-1, 1]) feed_dict = { xplaceholder: test_inputs, midPrice_means: test_means, midPrice_stddevs: test_stddevs } pred_val = sess.run(pred_midPrice, feed_dict=feed_dict) pred_val = np.asarray(pred_val) #print pred_val.shape for i in range(len(pred_val)): outfile.write(str(pred_val[i][0]) + '\n') outfile.close()
if total_batch - last_improved > require_improvement: # 验证集正确率长期不提升,提前结束训练 print("No optimization for a long time, auto-stopping...") flag = True break # 跳出循环 if flag: # 同上 break print("the best acc on validation is {}".format(best_acc_val)) if __name__ == '__main__': train_dir = "data/vectorized_data/train" val_dir = "data/vectorized_data/validation" vocab_dir = "data/file_dict/train/vocab.npy" save_dir = 'data/model2' data_process = Data() config = CharCNNConfig() if not os.path.exists(vocab_dir): data_process.build_vocab(train_dir, vocab_dir) words, word_to_id = data_process.load_vocab(vocab_dir) config.vocab_size = len(words) model = CharCNN(config) train()
from data_process import Data if __name__ == '__main__': target_data = Data() path = ".././source/origin_source/C1_data.xlsx" # 首先还原出我原先版本的结构 # 最少要读入5行数据,否则会报错 # 首先处理500的数据量级 target_data.classfing_all_data() ''' nrows = 5000 # 数据读入并设置基本参数 target_data.reset() target_data.store_data_with_can_matrix_for_single_canid(path, nrows) target_data.set_data_length() # 进行分类 target_data.initial_classfy_data() target_data.process_classfy_data() # 贪心算法求最优解 target_data.greedy_find_solution() # 显示 target_data.show_results() # target_data.check_sensor_or_counter() target_data.show_counter() target_data.show_sensor()
def nst_compute(self): # 首先对同余式方程组的模进行质数分解(调用prime_Decomposition函数) # 质数分解获得模的标准分解式,然后构成了第二套等价的方程组 for i in range(self.num_equation): a, b = prime_Decomposition(self.mod[i]) self.base2.extend(a) self.exponent.extend(b) for j in range(len(a)): self.remainder2.append(self.remainder[i]) self.num_equation2 += 1 # 双重循环遍历因数分解后的同余方程式组 # 这一步将使同余式方程组符合中国剩余定理 for i in range(self.num_equation2): for j in range(i + 1, self.num_equation2): if self.base2[i] == self.base2[j]: # flag为了标记第i项和第j项指数到底是谁比较大 flag = int(self.exponent[i] > self.exponent[j]) # c为指数较小项 # b为较大指数项 c = [self.exponent[j], self.exponent[i] ][self.exponent[j] >= self.exponent[i]] b = [self.exponent[i], self.exponent[j]][self.exponent[j] > self.exponent[i]] # 判别条件:(base^c)|余数差 if abs(self.remainder2[i] - self.remainder2[j]) % pow( self.base2[i], c) == 0: # 按条件扩充余数列表 if flag: self.remainder3.append(self.remainder2[i]) else: self.remainder3.append(self.remainder2[j]) # 按条件扩充底数,指数,并且方程个数增加一 self.base3.append(self.base2[i]) self.exponent3.append(b) self.num_equation3 += 1 else: print("矛盾,该同余式组无解") exit() # 将同余方程式组中落单的方程添加到最终方程组中 # 使用了enumerate函数,使用列表的count方法, # 若某项只出现一次,用该索引值获取该项值,并添加到相应列表中 for index, value in enumerate(self.base2): if self.base2.count(value) == 1: self.remainder3.append(self.remainder2[index]) self.base3.append(self.base2[index]) self.exponent3.append(self.exponent[index]) self.num_equation3 += 1 # 创建新文件,构造最终方程式组并将该方程式组放入txt文件中, # 这样就可以用该路径创建新的数据对象 # 到此为止,历尽千辛万苦构造的新同余式方程组就可能满足中国剩余定理了,如果满足,只要调用crt类即可 with open("src_data/transformed_data.txt", 'w') as f: for i in range(self.num_equation3): f.write("x=%d(mod%d)\n" % (self.remainder3[i], pow(self.base3[i], self.exponent3[i]))) # 构造Data对象 transformed_data = Data("src_data/transformed_data.txt") # 调用crt_judge方法,若返回1,则说明经过一轮转化已经满足中国剩余定理 if transformed_data.crt_judge(): tcrt = Crt(transformed_data) return tcrt.crt_compute() # 如果crt_judge方法返回0,则说明经过一轮转化并不转化成满足中国剩余定理形式,但已经更接近中国剩余定理条件 # 此时则需要进行递归运算,将此时的transformed_data再次传入nst,进行新的一轮三部转化 else: print("以一般方法 递归 求解") return Nst(transformed_data).nst_compute()