def main(): train_dir = "origin_data/-Doi_song_train.muc" dev_dir = "origin_data/Doi_song_dev.muc" test_dir = "origin_data/Doi_song_test.muc" counter = Counter() print("read train file") sentences_train = read_file(train_dir, counter) print("read dev file") sentences_dev = read_file(dev_dir, counter) print("read test file") sentences_test = read_file(test_dir, counter) print(counter) print("longest sentence: %s" % str(counter.longest_sen)) print("longest word: %s" % counter.longest_word()) word2idx = read_word_embedding() char2idx = tags2idx(counter.char_vocab) pos2idx = tags2idx(counter.pos_tags) chunk2idx = tags2idx(counter.chunk_tags) ner2idx = tags2idx(counter.ner_tags) utils.json_dump(char2idx, "embedding/char2idx.json") utils.json_dump(pos2idx, "embedding/pos2idx.json") utils.json_dump(chunk2idx, "embedding/chunk2idx.json") utils.json_dump(ner2idx, "embedding/ner2idx.json") print("encoding data") encoder = { "max_sen_len": counter.max_sen_len, "max_word_len": counter.max_word_len, **utils.make_dict(word2idx, char2idx, pos2idx, chunk2idx, ner2idx) } encoder = utils.dict_to_object(encoder) chars_train, words_train, pos_train, chunk_train, ner_train = encode_sens( sentences_train, encoder) chars_dev, words_dev, pos_dev, chunk_dev, ner_dev = encode_sens( sentences_dev, encoder) chars_test, words_test, pos_test, chunk_test, ner_test = encode_sens( sentences_test, encoder) print("saving data") data = utils.make_dict(chars_train, words_train, pos_train, chunk_train, ner_train, chars_dev, words_dev, pos_dev, chunk_dev, ner_dev, chars_test, words_test, pos_test, chunk_test, ner_test) os.makedirs("data", exist_ok=True) for k, d in data.items(): np.save("data/%s.npy" % k, d)
def main(): for folder in ["data/train", "data/test", "data/embedding"]: makedirs(folder, exist_ok=True) counter = Counter() print("read train file") sentences_train, y_train = read_file("origin_data/train.cln", counter) print("read test file") sentences_test, y_test = read_file("origin_data/test.cln", counter) print(counter) print("read word embeddings") word2idx = read_word_embeddings(counter.vocab_word) print("read char embeddings") char2idx = read_char_embeddings(counter.vocab_char) print("pretrain position embeddings") dis2idx_1, position_embeddings_1 = pretrain_embedding( counter.distances_1, POSITION_EMBED_SIZE) dis2idx_2, position_embeddings_2 = pretrain_embedding( counter.distances_2, POSITION_EMBED_SIZE) np.save("data/embedding/position_embeddings_1.npy", position_embeddings_1) np.save("data/embedding/position_embeddings_2.npy", position_embeddings_2) print("pretrain pos_tag embeddings") tag2idx, tag_embeddings = pretrain_embedding(counter.tags, TAG_EMBED_SIZE, True) np.save("data/embedding/tag_embeddings.npy", tag_embeddings) encoder = Encoder(word2idx, char2idx, dis2idx_1, dis2idx_2, tag2idx) print("saving train data") words_train, chars_train, pos1_train, pos2_train, e1_train, e2_train, tags_train, e1context_train, e2context_train, segments_train = zip( *[s.generate_features(encoder) for s in sentences_train]) data_train = make_dict(words_train, chars_train, pos1_train, pos2_train, e1_train, e2_train, tags_train, e1context_train, e2context_train, segments_train, y_train) numpy_save_many(data_train) print("saving test data") words_test, chars_test, pos1_test, pos2_test, e1_test, e2_test, tags_test, e1context_test, e2context_test, segments_test = zip( *[s.generate_features(encoder) for s in sentences_test]) data_test = make_dict(words_test, chars_test, pos1_test, pos2_test, e1_test, e2_test, tags_test, e1context_test, e2context_test, segments_test, y_test) numpy_save_many(data_test) print(encoder)
def main(train_dir, dev_dir, test_dir): # input_dir = "data/train/*.muc" vocabs_dir = "embedding/vocabs.json" counter = Counter() # num_sens = 0 read_file(train_dir, counter, update_kb=True) read_file(dev_dir, counter) read_file(test_dir, counter) print(counter) # print("Num sent train: %s" % num_sens) print("longest sentence: %s" % str(counter.longest_sen)) print("longest word: %s" % counter.longest_word()) kb_words = {k: list(v) for k, v in counter.kb_words.items()} json_dump(kb_words, "embedding/kb_words.json") word2idx = construct_word_embeddings(counter.word_vocab) char2idx = construct_char_embeddings(counter.char_vocab) pos2idx = construct_pos_embeddings(counter.pos_tags) ner2idx = construct_ner_embeddings(counter.ner_tags) vocabs = ObjectDict(make_dict(word2idx, char2idx, ner2idx, pos2idx), max_sen_len=counter.max_sen_len, max_word_len=counter.max_word_len) vocabs.save(vocabs_dir)
def main(): for folder in ["data/embedding"]: makedirs(folder, exist_ok=True) counter = Counter() print("read data") sentences, y = read_file("origin_data/data.cln", counter) print(counter) print("read word embeddings") word2idx = read_word_embeddings(counter.vocab_word) print("load position embeddings") dis2idx_1, position_embeddings_1 = pretrain_embedding(counter.distances_1, POSITION_EMBED_SIZE) json.dump(dis2idx_1, open("data/embedding/dis2idx_1.json", "w")) np.save("data/embedding/position_embeddings_1.npy", position_embeddings_1) dis2idx_2, position_embeddings_2 = pretrain_embedding(counter.distances_2, POSITION_EMBED_SIZE) json.dump(dis2idx_2, open("data/embedding/dis2idx_2.json", "w")) np.save("data/embedding/position_embeddings_2.npy", position_embeddings_2) encoder = Encoder(word2idx, dis2idx_1, dis2idx_2) print("saving data") words, pos1, pos2, e1, e2, e1context, e2context, segments = zip(*[s.generate_features(encoder) for s in sentences]) data = make_dict(words, pos1, pos2, e1, e2, e1context, e2context, segments, y) numpy_save_many(data) print(encoder)
def main(n, fpath): lines = [line.rstrip("\n") for line in open(fpath, encoding="utf8")] words = [] data = {} word_freq = {} for line in lines[1:]: try: qid, question = get_qid_question(line) data[qid] = question except ValueError: continue x = 0.6 s = matrix(data, x) make_dict(s, n)
def dataprocessing(task): # ============================================= # DATA PREPARATION # ============================================= print("======================================================") print("Data Loading..") patient_data, train, test = utils.load_data(parameters.patient_data_path, parameters.train_data_path, parameters.test_data_path, task) patient_header, patient_data = patient_data[0], patient_data[1:] print("Make Dictionary..") drug2id, id2drug, patient_data = utils.make_dict(train, test, patient_data) print("Make Input..") train, test = utils.make_input(train, test, patient_data, drug2id, task) print("======================================================") print("DATA STATISTICS\n") print("NUMBER OF PATIENTS: {}".format(len(patient_data))) print("TRAIN DATA: {}".format(len(train))) print("TEST DATA: {}".format(len(test))) print("NUMBER OF DRUGS: {}".format(len(drug2id))) return patient_data, train, test, drug2id
def reset(self): timeStep, obs, isTerminal = self.env.reset() self.start_time = pd.datetime(year=self.env.start_year, month=self.env.start_mon, day=self.env.start_day) self.cur_time = self.start_time obs_dict = make_dict(obs_name, obs) x_init = torch.tensor([obs_dict[state] for state in state_name ]).unsqueeze(0).double() # 1 x n_state return x_init, self.cur_time
def forward(self, x_init): # Using EnergyPlus default control strategy; action = () timeStep, obs, isTerminal = self.env.step(action) obs_dict = make_dict(obs_name, obs) next_state = torch.tensor([obs_dict[state] for state in state_name ]).unsqueeze(0).double() self.cur_time = self.start_time + pd.Timedelta(seconds=timeStep) # The action is the difference between Supply Air Temp. and Mixed Air Temp., i.e. the amount of heating from the heating coil. action = obs_dict["Sys Out Temp."] - obs_dict["MA Temp."] reward = R_func(obs_dict, action, eta) return next_state, torch.tensor([action]).double(), reward, obs_dict
def get_file_info(self, fid): """ 获得文件的详细信息 :param fid: 文件ID :return: 返回dict,依次存储状态码(success或者failed,若为failed则无需后面字段)、文件名、描述、存储路径 """ sql = "SELECT * FROM File WHERE ROWID = %d" % fid results = self._query(sql) if results: results = make_dict(results[0]) return results return {'status': 'failed'}
def findsim(table, n): similar_qids = {} for qid in ult_minhash: similar_qids[qid] = "" signatures = ult_minhash[qid] common_set = set() # finding all qids with same minhash signature for final Jaccard sim check for sig, t in zip(signatures, table): qids_at_minhash = table[t][sig] common_set.update(qids_at_minhash) for qid2 in common_set: if qid == qid2: continue sim = jaccard_sim(questions[qid], questions[qid2]) if sim >= x: similar_qids[qid] += str( qid2) if similar_qids[qid] == "" else "," + str(qid2) make_dict(similar_qids, n)
def train(split, x, y, x_index, embeddings, log_dir): f1_scores = [] for i, (train_index, test_index) in enumerate(split): fold_dir = "%s/fold_%d" % (log_dir, i + 1) os.makedirs(fold_dir, exist_ok=True) print("training fold %d" % (i + 1)) weights_path = "%s/weights.best.h5" % fold_dir np.save("%s/train_index.npy" % fold_dir, train_index) np.save("%s/test_index.npy" % fold_dir, test_index) callbacks = [ TensorBoard(fold_dir), F1score(), ModelCheckpoint(weights_path, monitor='f1', verbose=1, save_best_only=True, save_weights_only=True, mode='max'), EarlyStopping(patience=5, monitor='f1', mode='max') ] x_train = [d[x_index[train_index]] for d in x] y_train = y[train_index] x_test = [d[x_index[test_index]] for d in x] y_test = y[test_index] model = build_model(embeddings) model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCHS, verbose=2, callbacks=callbacks, validation_data=[x_test, y_test]) print("testing fold %d" % (i + 1)) model.load_weights(weights_path) scores = model.predict(x_test, verbose=False) predictions = scores.argmax(-1) f1 = evaluate(y_test, predictions, "%s/result.json" % fold_dir) print("f1_score: %.2f" % f1) f1_scores.append(f1) f1_avg = np.average(f1_scores) max_f1 = max(f1_scores) best_fold = int(np.argmax(f1_scores)) + 1 best_weights = "%s/fold_%d/weights.best.h5" % (log_dir, best_fold) result = make_dict(f1_avg, max_f1, best_fold, best_weights) print(result)
def main(*args): assert len(args) >= 2 word_embeddings = np.load("embedding/word_embeddings.npy") position_embeddings_1 = np.load("embedding/position_embeddings_1.npy") position_embeddings_2 = np.load("embedding/position_embeddings_2.npy") embeddings = make_dict(word_embeddings, position_embeddings_1, position_embeddings_2) from models import build_model model = build_model(embeddings) weights_path = args[0] model.load_weights(weights_path) dis2idx_1 = json_load("embedding/dis2idx_1.json") dis2idx_2 = json_load("embedding/dis2idx_2.json") word2idx = json_load("embedding/word2idx.json") encoder = Encoder(word2idx, dis2idx_1, dis2idx_2) input_file = args[1] sentences, y = read_input(input_file) data = list(map(list, zip(*[s.generate_features(encoder) for s in sentences]))) scores = model.predict(data, verbose=False) predictions = scores.argmax(-1) idx2relation = read_relations("origin_data/relations.txt") outputs = ["{} {}".format(prediction, idx2relation[prediction]) for prediction in predictions] print("\n".join(outputs)) timestamp = int(datetime.now().timestamp()) output_folder = "output/test/%d" % timestamp os.makedirs(output_folder, exist_ok=True) print("output folder: %s" % output_folder) output_file = os.path.join(output_folder, 'output.txt') error_list_file = os.path.join(output_folder, 'error_list.txt') error_predictions_file = os.path.join(output_folder, 'error_predictions.txt') write_lines(output_file, outputs) error_list = [] error_predictions = [] for sentence, label, prediction in zip(sentences, y, predictions): if label != prediction: error_list.append('{} {}'.format(label, str(sentence))) error_predictions.append('{} {}'.format(prediction, idx2relation[prediction])) write_lines(error_list_file, error_list) write_lines(error_predictions_file, error_predictions)
def main(): print("load data") x = [ np.load("data/%s.npy" % name) for name in [ "words", "pos1", "pos2", "e1", "e2", "e1context", "e2context", "segments" ] ] y = np.load("data/y.npy") x_index = np.arange(len(y)) skf = StratifiedKFold(n_splits=K_FOLD) print("load embeddings") word_embeddings = np.load("data/embedding/word_embeddings.npy") position_embeddings_1 = np.load("data/embedding/position_embeddings_1.npy") position_embeddings_2 = np.load("data/embedding/position_embeddings_2.npy") embeddings = make_dict(word_embeddings, position_embeddings_1, position_embeddings_2) print("training") config = K.tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) sess = K.tf.Session(config=config) K.set_session(sess) timestamp = int(datetime.now().timestamp()) log_dir = "output/train/%d" % timestamp print("log_dir = %s" % log_dir) split = skf.split(x_index, y) split = list(split) log_result = train(split, x, y, x_index, embeddings, log_dir) json.dump(log_result, open("%s/result.json" % log_dir, "w", encoding="utf8"), ensure_ascii=False, indent=4)
def waterfall(): filepath = '../../original/processed_data/' minmaxdatapath = '../../original/minmaxdata/' filenamepath = '../../scratch/bd_lstm/filenames/testfiles.txt' weightpath = '../../scratch/bd_lstm/trainstats/weights_middle.pth' parampath = './conf_model.cfg' filenames = read_names(filenamepath) filenamedict = make_dict(filenames) vels = ascendingorder_wf(filenames) num_files = len(vels) params = read_params(parampath) model = LSTM_layers(input_size=int(params['input_size']), hidden_size=int(params['hidden_size']), num_layers=int(params['n_layers']), dropout=float(params['dropout']), output_size=int(params['output_size']), batch_first=True, bidirectional=True) model.load_state_dict(torch.load(weightpath, map_location='cpu')) model.to(device) model.eval() arr = None hack_idx = 0 for velocity in vels: filename, velocity = find_closest(filenamedict, velocity) files = [filename] dataset = DataSet(root_dir=filepath, files=files, normalize=False, seq_len=seq_len, stride=max_stride) loader = DataLoader(dataset, batch_size=int(params['batch_size']), shuffle=True) for idx, sample in enumerate(loader): y = sample[:, :, :2].clone().detach().requires_grad_(True).to( device) x = sample[:, :, 2:].clone().detach().requires_grad_(True).to(device) h0 = model.init_hidden(int(params['batch_size']), None).to(device) c0 = model.init_cell(int(params['batch_size'])).to(device) #compute output = model.forward(x, (h0, c0)) frq_pred, Y_pred, frq_true, Y_true = fft(output, y, velocity, seq_len, filename, minmaxdatapath) vel_pred = np.ones(len(frq_pred)) * velocity break if hack_idx == 0: arr_pred = np.vstack((vel_pred, frq_pred, Y_pred)) arr_true = np.vstack((vel_pred, frq_true, Y_true)) else: arr2_pred = np.vstack((vel_pred, frq_pred, Y_pred)) arr2_true = np.vstack((vel_pred, frq_true, Y_true)) arr_pred = np.hstack((arr_pred, arr2_pred)) arr_true = np.hstack((arr_true, arr2_true)) if hack_idx > limit: break else: hack_idx += 1 print(velocity, hack_idx, '/', num_files) return arr_pred, arr_true
def make_lists_for_words_py(n_ers): gutted = gut_words(n_ers) dict = make_dict(gutted, n_ers) print()
def main(): # Create Simulation Environment env = gym.make('5Zone-control_TMY3-v0') # Modify here: Outputs from EnergyPlus; Match the variables.cfg file. obs_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Htg SP", "Clg SP", "Indoor Temp.", "Indoor Temp. Setpoint", "PPD", "Occupancy Flag", "Coil Power", "HVAC Power", "Sys In Temp.", "Sys In Mdot", "OA Temp.", "OA Mdot", "MA Temp.", "MA Mdot", "Sys Out Temp.", "Sys Out Mdot"] # Modify here: Change based on the specific control problem state_name = ["Indoor Temp."] dist_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Occupancy Flag"] ctrl_name = ["SA Temp Setpoint"] target_name = ["Indoor Temp. Setpoint"] n_state = len(state_name) n_ctrl = len(ctrl_name) eta = [0.1, args.eta] # eta: Weight for comfort during unoccupied and occupied mode step = args.step # step: Timestep; Unit in seconds T = args.T # T: Number of timesteps in the planning horizon tol_eps = 90 # tol_eps: Total number of episodes; Each episode is a natural day u_upper = 5 u_lower = 0 # Read Information on Weather, Occupancy, and Target Setpoint obs = pd.read_pickle("results/Dist-TMY3.pkl") target = obs[target_name] disturbance = obs[dist_name] # Min-Max Normalization disturbance = (disturbance - disturbance.min())/(disturbance.max() - disturbance.min()) torch.manual_seed(args.seed) memory = Replay_Memory() # From Imitation Learning F_hat = np.array([[0.9406, 0.2915]]) Bd_hat = np.array([[0.0578, 0.4390, 0.2087, 0.5389, 0.5080, 0.1035, 0.4162]]) agent = PPO(memory, T, n_ctrl, n_state, target, disturbance, eta, u_upper, u_lower, F_hat = F_hat, Bd_hat = Bd_hat) dir = 'results' if not os.path.exists(dir): os.mkdir(dir) perf = [] multiplier = 10 # Normalize the reward for better training performance n_step = 96 #timesteps per day timeStep, obs, isTerminal = env.reset() start_time = pd.datetime(year = env.start_year, month = env.start_mon, day = env.start_day) cur_time = start_time print(cur_time) obs_dict = make_dict(obs_name, obs) state = torch.tensor([obs_dict[name] for name in state_name]).unsqueeze(0).double() # 1 x n_state # Save for record timeStamp = [start_time] observations = [obs] actions_taken = [] for i_episode in range(tol_eps): log_probs = [] rewards = [] real_rewards = [] old_log_probs = [] states = [state] disturbance = [] actions = [] # Save for Parameter Updates CC = [] cc = [] sigma = 1 - 0.9*i_episode/tol_eps for t in range(n_step): dt = np.array(agent.dist[cur_time : cur_time + pd.Timedelta(seconds = (agent.T-2) * agent.step)]) # T-1 x n_dist dt = torch.tensor(dt).transpose(0, 1) # n_dist x T-1 ft = agent.Dist_func(dt) # T-1 x 1 x n_state C, c = agent.Cost_function(cur_time) opt_states, opt_actions = agent.forward(state, ft, C, c, current = False) # x, u: T x 1 x Dim. action, old_log_prob = agent.select_action(opt_actions[0], sigma) # Modify here based on the specific control problem. # Caveat: I send the Supply Air Temp. Setpoint to the Gym-Eplus interface. But, the RL agent controls the difference between Supply Air Temp. and Mixed Air Temp., i.e. the amount of heating from the heating coil. SAT_stpt = obs_dict["MA Temp."] + max(0, action.item()) if action.item()<0: action = torch.zeros_like(action) # If the room gets too warm during occupied period, uses outdoor air for free cooling. if (obs_dict["Indoor Temp."]>obs_dict["Indoor Temp. Setpoint"]) & (obs_dict["Occupancy Flag"]==1): SAT_stpt = obs_dict["Outdoor Temp."] timeStep, obs, isTerminal = env.step([SAT_stpt]) obs_dict = make_dict(obs_name, obs) cur_time = start_time + pd.Timedelta(seconds = timeStep) reward = R_func(obs_dict, action, eta) # Per episode real_rewards.append(reward) rewards.append(reward.double() / multiplier) state = torch.tensor([obs_dict[name] for name in state_name]).unsqueeze(0).double() actions.append(action) old_log_probs.append(old_log_prob) states.append(state) disturbance.append(dt) CC.append(C.squeeze()) cc.append(c.squeeze()) # Save for record timeStamp.append(cur_time) observations.append(obs) actions_taken.append([action.item(), SAT_stpt]) print("{}, Action: {}, SAT Setpoint: {}, Actual SAT:{}, State: {}, Target: {}, Occupied: {}, Reward: {}".format(cur_time, action.item(), SAT_stpt, obs_dict["Sys Out Temp."], obs_dict["Indoor Temp."], obs_dict["Indoor Temp. Setpoint"], obs_dict["Occupancy Flag"], reward)) advantages = Advantage_func(rewards, args.gamma) old_log_probs = torch.stack(old_log_probs).squeeze().detach().clone() next_states = torch.stack(states[1:]).squeeze(1) states = torch.stack(states[:-1]).squeeze(1) actions = torch.stack(actions).squeeze(1).detach().clone() CC = torch.stack(CC).squeeze() # n_batch x T x (m+n) x (m+n) cc = torch.stack(cc).squeeze() # n_batch x T x (m+n) disturbance = torch.stack(disturbance) # n_batch x T x n_dist agent.memory.append(states, actions, next_states, advantages, old_log_probs, disturbance, CC, cc) # if -1, do not update parameters if args.update_episode == -1: print("Pass") pass elif (agent.memory.len>= args.update_episode)&(i_episode % args.update_episode ==0): batch_states, batch_actions, b_next_states, batch_dist, batch_rewards, batch_old_logprobs, batch_CC, batch_cc = agent.memory.sample_batch(args.update_episode) batch_set = Dataset(batch_states, batch_actions, b_next_states, batch_dist, batch_rewards, batch_old_logprobs, batch_CC, batch_cc) batch_loader = data.DataLoader(batch_set, batch_size=48, shuffle=True, num_workers=2) agent.update_parameters(batch_loader, sigma) perf.append([np.mean(real_rewards), np.std(real_rewards)]) print("{}, reward: {}".format(cur_time, np.mean(real_rewards))) save_name = args.save_name obs_df = pd.DataFrame(np.array(observations), index = np.array(timeStamp), columns = obs_name) action_df = pd.DataFrame(np.array(actions_taken), index = np.array(timeStamp[:-1]), columns = ["Delta T", "Supply Air Temp. Setpoint"]) obs_df.to_pickle("results/perf_"+save_name+"_obs.pkl") action_df.to_pickle("results/perf_"+save_name+"_actions.pkl") pickle.dump(np.array(perf), open("results/perf_"+save_name+".npy", "wb"))
return K.max(inputs, axis=2) class PiecewiseMaxPool(Layer): def compute_output_shape(self, input_shape): return None, PCNN_OUTPUT_SIZE def call(self, inputs, **kwargs): inputs, segments = inputs seg1 = inputs * K.expand_dims(segments[:, :, 0]) seg2 = inputs * K.expand_dims(segments[:, :, 1]) seg3 = inputs * K.expand_dims(segments[:, :, 2]) output1 = K.expand_dims(K.max(seg1, 1)) output2 = K.expand_dims(K.max(seg2, 1)) output3 = K.expand_dims(K.max(seg3, 1)) output = K.concatenate([output1, output2, output3]) output = K.reshape(output, [-1, PCNN_OUTPUT_SIZE]) return output if __name__ == "__main__": word_embeddings = np.random.random([10, WORD_EMBED_SIZE]) position_embeddings_1 = np.random.random([10, POSITION_EMBED_SIZE]) position_embeddings_2 = np.random.random([10, POSITION_EMBED_SIZE]) char_embeddings = np.random.random([10, CHAR_EMBED_SIZE]) tag_embeddings = np.random.random([10, TAG_EMBED_SIZE]) embeddings = make_dict(word_embeddings, position_embeddings_1, position_embeddings_2, char_embeddings, tag_embeddings) build_model(embeddings)
obs_name = [ "Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Htg SP", "Clg SP", "Indoor Temp.", "Indoor Temp. Setpoint", "PPD", "Occupancy Flag", "Coil Power", "HVAC Power", "Sys In Temp.", "Sys In Mdot", "OA Temp.", "OA Mdot", "MA Temp.", "MA Mdot", "Sys Out Temp.", "Sys Out Mdot" ] dist_name = [ "Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Indoor Temp. Setpoint", "Occupancy Flag" ] # Reset the env (creat the EnergyPlus subprocess) timeStep, obs, isTerminal = env.reset() obs_dict = make_dict(obs_name, obs) start_time = pd.datetime(year=env.start_year, month=env.start_mon, day=env.start_day) print(start_time) timeStamp = [start_time] observations = [obs] actions = [] for i in range(91 * 96): # Using EnergyPlus default control strategy; action = () timeStep, obs, isTerminal = env.step(action) obs_dict = make_dict(obs_name, obs) cur_time = start_time + pd.Timedelta(seconds=timeStep)
def evaluate_any_file(): #os.system(scp ) filepath = '../../original/processed_data/' weightpath = '../../scratch/bd_lstm/trainstats/weights_middle.pth' demoweights = '../../scratch/bd_lstm/trainstats/demoweights.pth' weightpath = demoweights parampath = '../../code/bdrnn/conf_model.cfg' filenamepath = '../../scratch/bd_lstm/filenames/testfiles.txt' minmaxdatapath = '../../original/minmaxdata/' #get best file filenames = read_names(filenamepath) print(len(filenames)) filenamedict = make_dict(filenames) velocity = float( input( 'Give rotational velocity between 4Hz and 18Hz and the closest one is used at evaluation.\n' )) filename, velocity = find_closest(filenamedict, velocity) files = [filename] #read parameters params = read_params(parampath) #init dataset with the file we selected and model dataset = DataSet(root_dir=filepath, files=files, normalize=False, seq_len=params['slice_size'], stride=1000) loader = DataLoader(dataset, batch_size=int(params['batch_size']), shuffle=True) model = LSTM_layers(input_size=int(params['input_size']), hidden_size=int(params['hidden_size']), num_layers=int(params['n_layers']), dropout=float(params['dropout']), output_size=int(params['output_size']), batch_first=True, bidirectional=True) #RuntimeError: Attempting to deserialize object on a #CUDA device but torch.cuda.is_available() is False. #If you are running on a CPU-only machine, #please use torch.load with map_location='cpu' to map your storages to the CPU. model.load_state_dict(torch.load(weightpath, map_location='cpu')) model.to(device) model.eval() losses = [] for idx, sample in enumerate(loader): y = sample[:, :, :2].clone().detach().requires_grad_(True).to(device) x = sample[:, :, 2:].clone().detach().requires_grad_(True).to(device) h0 = model.init_hidden(int(params['batch_size']), None).to(device) c0 = model.init_cell(int(params['batch_size'])).to(device) #compute output = model.forward(x, (h0, c0)) loss = F.mse_loss(output, y) losses.append(loss.item()) output, y = scale_seqs(output, y, filename, minmaxdatapath) if (idx % 3) == 0: save_this_plot(0, 2763, output[0], y[0], loss.item(), velocity) print("Avg loss:", np.mean(losses))
def main(): print("load train data") x_train = [ np.load("data/train/%s.npy" % name) for name in [ "words", "pos1", "pos2", "e1", "e2", "e1context", "e2context", "tags", "chars", "segments" ] ] y_train = np.load("data/train/y.npy") print("load test data") x_test = [ np.load("data/test/%s.npy" % name) for name in [ "words", "pos1", "pos2", "e1", "e2", "e1context", "e2context", "tags", "chars", "segments" ] ] y_test = np.load("data/test/y.npy") print("load embeddings") word_embeddings = np.load("data/embedding/word_embeddings.npy") position_embeddings_1 = np.load("data/embedding/position_embeddings_1.npy") position_embeddings_2 = np.load("data/embedding/position_embeddings_2.npy") char_embeddings = np.load("data/embedding/char_embeddings.npy") tag_embeddings = np.load("data/embedding/tag_embeddings.npy") embeddings = make_dict(word_embeddings, position_embeddings_1, position_embeddings_2, char_embeddings, tag_embeddings) print("training") config = ConfigProto() config.log_device_placement = False config.gpu_options.allow_growth = True sess = Session(config=config) K.set_session(sess) if not os.path.exists("model"): os.makedirs("model") filepath = "model/weights.best.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') model = build_model(embeddings) model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCHS, verbose=True, callbacks=[checkpoint], validation_data=[x_test, y_test]) print("testing") model.load_weights("model/weights.best.hdf5") scores = model.predict(x_test, verbose=False) predictions = scores.argmax(-1) meta = """ POS tagging: yes characted-level: yes attention input: yes PCNN: yes """ gen_answer_key(predictions, meta=meta)
RNN_STEP = 5 EPOCHS = 1000 BATCH_SIZE = 2000 LEARNING_RATE = 0.001 SAMPLE_NUM = 8 # ============================================= ## DATA PREPARATION print("========================================") print("Data Loading..") train, validation, test = utils.load_data(TRAIN_DATA_PATH, VALIDATAION_DATA_PATH, TEST_DATA_PATH) print("Make Dictionary..") train, validation, test, user2id, id2user, venue2id, id2venue, venue_frequency = utils.make_dict( train, validation, test) print("Make Input..") train, validation, test = utils.make_input(train, validation, test, RNN_STEP) # ============================================= def get_eval_score(candidate, rank): _mrr = .0 for i in xrange(len(candidate)): _rank = np.where(rank[i] == candidate[i]) _mrr += (1.0 / (_rank[0] + 1)) return _mrr