Ejemplo n.º 1
0
def main():
    train_dir = "origin_data/-Doi_song_train.muc"
    dev_dir = "origin_data/Doi_song_dev.muc"
    test_dir = "origin_data/Doi_song_test.muc"

    counter = Counter()

    print("read train file")
    sentences_train = read_file(train_dir, counter)

    print("read dev file")
    sentences_dev = read_file(dev_dir, counter)

    print("read test file")
    sentences_test = read_file(test_dir, counter)

    print(counter)
    print("longest sentence: %s" % str(counter.longest_sen))
    print("longest word: %s" % counter.longest_word())

    word2idx = read_word_embedding()

    char2idx = tags2idx(counter.char_vocab)
    pos2idx = tags2idx(counter.pos_tags)
    chunk2idx = tags2idx(counter.chunk_tags)
    ner2idx = tags2idx(counter.ner_tags)
    utils.json_dump(char2idx, "embedding/char2idx.json")
    utils.json_dump(pos2idx, "embedding/pos2idx.json")
    utils.json_dump(chunk2idx, "embedding/chunk2idx.json")
    utils.json_dump(ner2idx, "embedding/ner2idx.json")

    print("encoding data")
    encoder = {
        "max_sen_len": counter.max_sen_len,
        "max_word_len": counter.max_word_len,
        **utils.make_dict(word2idx, char2idx, pos2idx, chunk2idx, ner2idx)
    }
    encoder = utils.dict_to_object(encoder)
    chars_train, words_train, pos_train, chunk_train, ner_train = encode_sens(
        sentences_train, encoder)
    chars_dev, words_dev, pos_dev, chunk_dev, ner_dev = encode_sens(
        sentences_dev, encoder)
    chars_test, words_test, pos_test, chunk_test, ner_test = encode_sens(
        sentences_test, encoder)

    print("saving data")
    data = utils.make_dict(chars_train, words_train, pos_train, chunk_train,
                           ner_train, chars_dev, words_dev, pos_dev, chunk_dev,
                           ner_dev, chars_test, words_test, pos_test,
                           chunk_test, ner_test)
    os.makedirs("data", exist_ok=True)
    for k, d in data.items():
        np.save("data/%s.npy" % k, d)
Ejemplo n.º 2
0
def main():
    for folder in ["data/train", "data/test", "data/embedding"]:
        makedirs(folder, exist_ok=True)

    counter = Counter()

    print("read train file")
    sentences_train, y_train = read_file("origin_data/train.cln", counter)

    print("read test file")
    sentences_test, y_test = read_file("origin_data/test.cln", counter)

    print(counter)

    print("read word embeddings")
    word2idx = read_word_embeddings(counter.vocab_word)

    print("read char embeddings")
    char2idx = read_char_embeddings(counter.vocab_char)

    print("pretrain position embeddings")
    dis2idx_1, position_embeddings_1 = pretrain_embedding(
        counter.distances_1, POSITION_EMBED_SIZE)
    dis2idx_2, position_embeddings_2 = pretrain_embedding(
        counter.distances_2, POSITION_EMBED_SIZE)
    np.save("data/embedding/position_embeddings_1.npy", position_embeddings_1)
    np.save("data/embedding/position_embeddings_2.npy", position_embeddings_2)

    print("pretrain pos_tag embeddings")
    tag2idx, tag_embeddings = pretrain_embedding(counter.tags, TAG_EMBED_SIZE,
                                                 True)
    np.save("data/embedding/tag_embeddings.npy", tag_embeddings)

    encoder = Encoder(word2idx, char2idx, dis2idx_1, dis2idx_2, tag2idx)

    print("saving train data")
    words_train, chars_train, pos1_train, pos2_train, e1_train, e2_train, tags_train, e1context_train, e2context_train, segments_train = zip(
        *[s.generate_features(encoder) for s in sentences_train])
    data_train = make_dict(words_train, chars_train, pos1_train, pos2_train,
                           e1_train, e2_train, tags_train, e1context_train,
                           e2context_train, segments_train, y_train)
    numpy_save_many(data_train)

    print("saving test data")
    words_test, chars_test, pos1_test, pos2_test, e1_test, e2_test, tags_test, e1context_test, e2context_test, segments_test = zip(
        *[s.generate_features(encoder) for s in sentences_test])
    data_test = make_dict(words_test, chars_test, pos1_test, pos2_test,
                          e1_test, e2_test, tags_test, e1context_test,
                          e2context_test, segments_test, y_test)
    numpy_save_many(data_test)

    print(encoder)
Ejemplo n.º 3
0
def main(train_dir, dev_dir, test_dir):
    # input_dir = "data/train/*.muc"
    vocabs_dir = "embedding/vocabs.json"

    counter = Counter()

    # num_sens = 0
    read_file(train_dir, counter, update_kb=True)
    read_file(dev_dir, counter)
    read_file(test_dir, counter)


    print(counter)
    # print("Num sent train: %s" % num_sens)
    print("longest sentence: %s" % str(counter.longest_sen))
    print("longest word: %s" % counter.longest_word())

    kb_words = {k: list(v) for k, v in counter.kb_words.items()}
    json_dump(kb_words, "embedding/kb_words.json")

    word2idx = construct_word_embeddings(counter.word_vocab)
    char2idx = construct_char_embeddings(counter.char_vocab)
    pos2idx = construct_pos_embeddings(counter.pos_tags)
    ner2idx = construct_ner_embeddings(counter.ner_tags)

    vocabs = ObjectDict(make_dict(word2idx, char2idx, ner2idx, pos2idx), max_sen_len=counter.max_sen_len, max_word_len=counter.max_word_len)
    vocabs.save(vocabs_dir)
Ejemplo n.º 4
0
def main():
    for folder in ["data/embedding"]:
        makedirs(folder, exist_ok=True)

    counter = Counter()

    print("read data")
    sentences, y = read_file("origin_data/data.cln", counter)

    print(counter)

    print("read word embeddings")
    word2idx = read_word_embeddings(counter.vocab_word)

    print("load position embeddings")
    dis2idx_1, position_embeddings_1 = pretrain_embedding(counter.distances_1, POSITION_EMBED_SIZE)
    json.dump(dis2idx_1, open("data/embedding/dis2idx_1.json", "w"))
    np.save("data/embedding/position_embeddings_1.npy", position_embeddings_1)
    dis2idx_2, position_embeddings_2 = pretrain_embedding(counter.distances_2, POSITION_EMBED_SIZE)
    json.dump(dis2idx_2, open("data/embedding/dis2idx_2.json", "w"))
    np.save("data/embedding/position_embeddings_2.npy", position_embeddings_2)

    encoder = Encoder(word2idx, dis2idx_1, dis2idx_2)

    print("saving data")
    words, pos1, pos2, e1, e2, e1context, e2context, segments = zip(*[s.generate_features(encoder) for s in sentences])
    data = make_dict(words, pos1, pos2, e1, e2, e1context, e2context, segments, y)
    numpy_save_many(data)

    print(encoder)
Ejemplo n.º 5
0
def main(n, fpath):
    lines = [line.rstrip("\n") for line in open(fpath, encoding="utf8")]
    
    words = []
    data = {}
    word_freq = {}
    for line in lines[1:]:
        try:
            qid, question = get_qid_question(line)
            data[qid] = question
        except ValueError:
            continue
    
    x = 0.6
    s = matrix(data, x)
    make_dict(s, n)
Ejemplo n.º 6
0
def dataprocessing(task):
    # =============================================
    # DATA PREPARATION
    # =============================================
    print("======================================================")
    print("Data Loading..")
    patient_data, train, test = utils.load_data(parameters.patient_data_path,
                                                parameters.train_data_path,
                                                parameters.test_data_path,
                                                task)

    patient_header, patient_data = patient_data[0], patient_data[1:]

    print("Make Dictionary..")
    drug2id, id2drug, patient_data = utils.make_dict(train, test, patient_data)

    print("Make Input..")
    train, test = utils.make_input(train, test, patient_data, drug2id, task)

    print("======================================================")
    print("DATA STATISTICS\n")
    print("NUMBER OF PATIENTS: {}".format(len(patient_data)))
    print("TRAIN DATA: {}".format(len(train)))
    print("TEST DATA: {}".format(len(test)))
    print("NUMBER OF DRUGS: {}".format(len(drug2id)))

    return patient_data, train, test, drug2id
Ejemplo n.º 7
0
 def reset(self):
     timeStep, obs, isTerminal = self.env.reset()
     self.start_time = pd.datetime(year=self.env.start_year,
                                   month=self.env.start_mon,
                                   day=self.env.start_day)
     self.cur_time = self.start_time
     obs_dict = make_dict(obs_name, obs)
     x_init = torch.tensor([obs_dict[state] for state in state_name
                            ]).unsqueeze(0).double()  # 1 x n_state
     return x_init, self.cur_time
Ejemplo n.º 8
0
 def forward(self, x_init):
     # Using EnergyPlus default control strategy;
     action = ()
     timeStep, obs, isTerminal = self.env.step(action)
     obs_dict = make_dict(obs_name, obs)
     next_state = torch.tensor([obs_dict[state] for state in state_name
                                ]).unsqueeze(0).double()
     self.cur_time = self.start_time + pd.Timedelta(seconds=timeStep)
     # The action is the difference between Supply Air Temp. and Mixed Air Temp., i.e. the amount of heating from the heating coil.
     action = obs_dict["Sys Out Temp."] - obs_dict["MA Temp."]
     reward = R_func(obs_dict, action, eta)
     return next_state, torch.tensor([action]).double(), reward, obs_dict
Ejemplo n.º 9
0
 def get_file_info(self, fid):
     """
     获得文件的详细信息
     :param fid: 文件ID
     :return: 返回dict,依次存储状态码(success或者failed,若为failed则无需后面字段)、文件名、描述、存储路径
     """
     sql = "SELECT * FROM File WHERE ROWID = %d" % fid
     results = self._query(sql)
     if results:
         results = make_dict(results[0])
         return results
     return {'status': 'failed'}
Ejemplo n.º 10
0
def findsim(table, n):
    similar_qids = {}

    for qid in ult_minhash:
        similar_qids[qid] = ""
        signatures = ult_minhash[qid]
        common_set = set()

        # finding all qids with same minhash signature for final Jaccard sim check
        for sig, t in zip(signatures, table):
            qids_at_minhash = table[t][sig]
            common_set.update(qids_at_minhash)

        for qid2 in common_set:
            if qid == qid2:
                continue

            sim = jaccard_sim(questions[qid], questions[qid2])
            if sim >= x:
                similar_qids[qid] += str(
                    qid2) if similar_qids[qid] == "" else "," + str(qid2)

    make_dict(similar_qids, n)
Ejemplo n.º 11
0
def train(split, x, y, x_index, embeddings, log_dir):
    f1_scores = []
    for i, (train_index, test_index) in enumerate(split):
        fold_dir = "%s/fold_%d" % (log_dir, i + 1)
        os.makedirs(fold_dir, exist_ok=True)
        print("training fold %d" % (i + 1))
        weights_path = "%s/weights.best.h5" % fold_dir

        np.save("%s/train_index.npy" % fold_dir, train_index)
        np.save("%s/test_index.npy" % fold_dir, test_index)

        callbacks = [
            TensorBoard(fold_dir),
            F1score(),
            ModelCheckpoint(weights_path,
                            monitor='f1',
                            verbose=1,
                            save_best_only=True,
                            save_weights_only=True,
                            mode='max'),
            EarlyStopping(patience=5, monitor='f1', mode='max')
        ]

        x_train = [d[x_index[train_index]] for d in x]
        y_train = y[train_index]
        x_test = [d[x_index[test_index]] for d in x]
        y_test = y[test_index]
        model = build_model(embeddings)
        model.fit(x_train,
                  y_train,
                  batch_size=BATCH_SIZE,
                  epochs=NB_EPOCHS,
                  verbose=2,
                  callbacks=callbacks,
                  validation_data=[x_test, y_test])

        print("testing fold %d" % (i + 1))
        model.load_weights(weights_path)
        scores = model.predict(x_test, verbose=False)
        predictions = scores.argmax(-1)
        f1 = evaluate(y_test, predictions, "%s/result.json" % fold_dir)
        print("f1_score: %.2f" % f1)
        f1_scores.append(f1)
    f1_avg = np.average(f1_scores)
    max_f1 = max(f1_scores)
    best_fold = int(np.argmax(f1_scores)) + 1
    best_weights = "%s/fold_%d/weights.best.h5" % (log_dir, best_fold)
    result = make_dict(f1_avg, max_f1, best_fold, best_weights)
    print(result)
Ejemplo n.º 12
0
def main(*args):
    assert len(args) >= 2

    word_embeddings = np.load("embedding/word_embeddings.npy")
    position_embeddings_1 = np.load("embedding/position_embeddings_1.npy")
    position_embeddings_2 = np.load("embedding/position_embeddings_2.npy")
    embeddings = make_dict(word_embeddings, position_embeddings_1, position_embeddings_2)

    from models import build_model
    model = build_model(embeddings)
    weights_path = args[0]
    model.load_weights(weights_path)

    dis2idx_1 = json_load("embedding/dis2idx_1.json")
    dis2idx_2 = json_load("embedding/dis2idx_2.json")
    word2idx = json_load("embedding/word2idx.json")
    encoder = Encoder(word2idx, dis2idx_1, dis2idx_2)

    input_file = args[1]
    sentences, y = read_input(input_file)
    data = list(map(list, zip(*[s.generate_features(encoder) for s in sentences])))

    scores = model.predict(data, verbose=False)
    predictions = scores.argmax(-1)
    idx2relation = read_relations("origin_data/relations.txt")
    outputs = ["{} {}".format(prediction, idx2relation[prediction]) for prediction in predictions]

    print("\n".join(outputs))

    timestamp = int(datetime.now().timestamp())
    output_folder = "output/test/%d" % timestamp
    os.makedirs(output_folder, exist_ok=True)
    print("output folder: %s" % output_folder)
    output_file = os.path.join(output_folder, 'output.txt')
    error_list_file = os.path.join(output_folder, 'error_list.txt')
    error_predictions_file = os.path.join(output_folder, 'error_predictions.txt')

    write_lines(output_file, outputs)

    error_list = []
    error_predictions = []
    for sentence, label, prediction in zip(sentences, y, predictions):
        if label != prediction:
            error_list.append('{} {}'.format(label, str(sentence)))
            error_predictions.append('{} {}'.format(prediction, idx2relation[prediction]))

    write_lines(error_list_file, error_list)
    write_lines(error_predictions_file, error_predictions)
Ejemplo n.º 13
0
def main():
    print("load data")
    x = [
        np.load("data/%s.npy" % name) for name in [
            "words", "pos1", "pos2", "e1", "e2", "e1context", "e2context",
            "segments"
        ]
    ]
    y = np.load("data/y.npy")
    x_index = np.arange(len(y))
    skf = StratifiedKFold(n_splits=K_FOLD)

    print("load embeddings")
    word_embeddings = np.load("data/embedding/word_embeddings.npy")
    position_embeddings_1 = np.load("data/embedding/position_embeddings_1.npy")
    position_embeddings_2 = np.load("data/embedding/position_embeddings_2.npy")
    embeddings = make_dict(word_embeddings, position_embeddings_1,
                           position_embeddings_2)

    print("training")
    config = K.tf.ConfigProto(log_device_placement=False,
                              allow_soft_placement=True)
    sess = K.tf.Session(config=config)
    K.set_session(sess)

    timestamp = int(datetime.now().timestamp())
    log_dir = "output/train/%d" % timestamp
    print("log_dir = %s" % log_dir)
    split = skf.split(x_index, y)
    split = list(split)

    log_result = train(split, x, y, x_index, embeddings, log_dir)
    json.dump(log_result,
              open("%s/result.json" % log_dir, "w", encoding="utf8"),
              ensure_ascii=False,
              indent=4)
Ejemplo n.º 14
0
def waterfall():
    filepath = '../../original/processed_data/'
    minmaxdatapath = '../../original/minmaxdata/'
    filenamepath = '../../scratch/bd_lstm/filenames/testfiles.txt'
    weightpath = '../../scratch/bd_lstm/trainstats/weights_middle.pth'
    parampath = './conf_model.cfg'

    filenames = read_names(filenamepath)

    filenamedict = make_dict(filenames)

    vels = ascendingorder_wf(filenames)
    num_files = len(vels)

    params = read_params(parampath)
    model = LSTM_layers(input_size=int(params['input_size']),
                        hidden_size=int(params['hidden_size']),
                        num_layers=int(params['n_layers']),
                        dropout=float(params['dropout']),
                        output_size=int(params['output_size']),
                        batch_first=True,
                        bidirectional=True)

    model.load_state_dict(torch.load(weightpath, map_location='cpu'))
    model.to(device)
    model.eval()
    arr = None
    hack_idx = 0
    for velocity in vels:
        filename, velocity = find_closest(filenamedict, velocity)

        files = [filename]
        dataset = DataSet(root_dir=filepath,
                          files=files,
                          normalize=False,
                          seq_len=seq_len,
                          stride=max_stride)
        loader = DataLoader(dataset,
                            batch_size=int(params['batch_size']),
                            shuffle=True)
        for idx, sample in enumerate(loader):
            y = sample[:, :, :2].clone().detach().requires_grad_(True).to(
                device)
            x = sample[:, :,
                       2:].clone().detach().requires_grad_(True).to(device)
            h0 = model.init_hidden(int(params['batch_size']), None).to(device)
            c0 = model.init_cell(int(params['batch_size'])).to(device)

            #compute
            output = model.forward(x, (h0, c0))
            frq_pred, Y_pred, frq_true, Y_true = fft(output, y, velocity,
                                                     seq_len, filename,
                                                     minmaxdatapath)
            vel_pred = np.ones(len(frq_pred)) * velocity
            break
        if hack_idx == 0:
            arr_pred = np.vstack((vel_pred, frq_pred, Y_pred))
            arr_true = np.vstack((vel_pred, frq_true, Y_true))
        else:
            arr2_pred = np.vstack((vel_pred, frq_pred, Y_pred))
            arr2_true = np.vstack((vel_pred, frq_true, Y_true))
            arr_pred = np.hstack((arr_pred, arr2_pred))
            arr_true = np.hstack((arr_true, arr2_true))
        if hack_idx > limit:
            break
        else:
            hack_idx += 1
        print(velocity, hack_idx, '/', num_files)
    return arr_pred, arr_true
def make_lists_for_words_py(n_ers):
    gutted = gut_words(n_ers)
    dict = make_dict(gutted, n_ers)
    print()
Ejemplo n.º 16
0
def main():
    # Create Simulation Environment
    env = gym.make('5Zone-control_TMY3-v0')

    # Modify here: Outputs from EnergyPlus; Match the variables.cfg file.
    obs_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Htg SP", "Clg SP", "Indoor Temp.", "Indoor Temp. Setpoint", "PPD", "Occupancy Flag", "Coil Power", "HVAC Power", "Sys In Temp.", "Sys In Mdot", "OA Temp.", "OA Mdot", "MA Temp.", "MA Mdot", "Sys Out Temp.", "Sys Out Mdot"]

    # Modify here: Change based on the specific control problem
    state_name = ["Indoor Temp."]
    dist_name = ["Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction", "Diff. Solar Rad.", "Direct Solar Rad.", "Occupancy Flag"]
    ctrl_name = ["SA Temp Setpoint"]
    target_name = ["Indoor Temp. Setpoint"]
    
    n_state = len(state_name)
    n_ctrl = len(ctrl_name)

    eta = [0.1, args.eta] # eta: Weight for comfort during unoccupied and occupied mode
    step = args.step # step: Timestep; Unit in seconds
    T = args.T # T: Number of timesteps in the planning horizon
    tol_eps = 90 # tol_eps: Total number of episodes; Each episode is a natural day

    u_upper = 5
    u_lower = 0

    # Read Information on Weather, Occupancy, and Target Setpoint
    obs = pd.read_pickle("results/Dist-TMY3.pkl")
    target = obs[target_name]
    disturbance = obs[dist_name]
    
    # Min-Max Normalization
    disturbance = (disturbance - disturbance.min())/(disturbance.max() - disturbance.min())

    torch.manual_seed(args.seed)
    memory = Replay_Memory()
    
    # From Imitation Learning
    F_hat = np.array([[0.9406, 0.2915]])
    Bd_hat = np.array([[0.0578, 0.4390, 0.2087, 0.5389, 0.5080, 0.1035, 0.4162]])
    agent = PPO(memory, T, n_ctrl, n_state, target, disturbance, eta, u_upper, u_lower, F_hat = F_hat, Bd_hat = Bd_hat)

    dir = 'results'
    if not os.path.exists(dir):
        os.mkdir(dir)
    
    perf = []
    multiplier = 10 # Normalize the reward for better training performance
    n_step = 96 #timesteps per day
    
    timeStep, obs, isTerminal = env.reset()
    start_time = pd.datetime(year = env.start_year, month = env.start_mon, day = env.start_day)
    cur_time = start_time
    print(cur_time)
    obs_dict = make_dict(obs_name, obs)
    state = torch.tensor([obs_dict[name] for name in state_name]).unsqueeze(0).double() # 1 x n_state
    
    # Save for record
    timeStamp = [start_time]
    observations = [obs]
    actions_taken = []

    for i_episode in range(tol_eps):
        log_probs = []
        rewards = []
        real_rewards = []
        old_log_probs = []
        states = [state]
        disturbance = []
        actions = [] # Save for Parameter Updates
        CC = []
        cc = []
        sigma = 1 - 0.9*i_episode/tol_eps

        for t in range(n_step):
            dt = np.array(agent.dist[cur_time : cur_time + pd.Timedelta(seconds = (agent.T-2) * agent.step)]) # T-1 x n_dist
            dt = torch.tensor(dt).transpose(0, 1) # n_dist x T-1
            ft = agent.Dist_func(dt) # T-1 x 1 x n_state
            C, c = agent.Cost_function(cur_time)
            opt_states, opt_actions = agent.forward(state, ft, C, c, current = False) # x, u: T x 1 x Dim.
            action, old_log_prob = agent.select_action(opt_actions[0], sigma)

            # Modify here based on the specific control problem.
            # Caveat: I send the Supply Air Temp. Setpoint to the Gym-Eplus interface. But, the RL agent controls the difference between Supply Air Temp. and Mixed Air Temp., i.e. the amount of heating from the heating coil.
            SAT_stpt = obs_dict["MA Temp."] + max(0, action.item())
            if action.item()<0:
                action = torch.zeros_like(action)
            # If the room gets too warm during occupied period, uses outdoor air for free cooling.
            if (obs_dict["Indoor Temp."]>obs_dict["Indoor Temp. Setpoint"]) & (obs_dict["Occupancy Flag"]==1):
                SAT_stpt = obs_dict["Outdoor Temp."]
            timeStep, obs, isTerminal = env.step([SAT_stpt])
            
            obs_dict = make_dict(obs_name, obs)
            cur_time = start_time + pd.Timedelta(seconds = timeStep)
            reward = R_func(obs_dict, action, eta)
            
            # Per episode
            real_rewards.append(reward)
            rewards.append(reward.double() / multiplier)
            state = torch.tensor([obs_dict[name] for name in state_name]).unsqueeze(0).double()
            actions.append(action)
            old_log_probs.append(old_log_prob)
            states.append(state)
            disturbance.append(dt)
            CC.append(C.squeeze())
            cc.append(c.squeeze())
            
            # Save for record
            timeStamp.append(cur_time)
            observations.append(obs)
            actions_taken.append([action.item(), SAT_stpt])
            print("{}, Action: {}, SAT Setpoint: {}, Actual SAT:{}, State: {}, Target: {}, Occupied: {}, Reward: {}".format(cur_time,
                action.item(), SAT_stpt, obs_dict["Sys Out Temp."], obs_dict["Indoor Temp."], obs_dict["Indoor Temp. Setpoint"], obs_dict["Occupancy Flag"], reward))

        advantages = Advantage_func(rewards, args.gamma)
        old_log_probs = torch.stack(old_log_probs).squeeze().detach().clone()
        next_states = torch.stack(states[1:]).squeeze(1)
        states = torch.stack(states[:-1]).squeeze(1)
        actions = torch.stack(actions).squeeze(1).detach().clone()
        CC = torch.stack(CC).squeeze() # n_batch x T x (m+n) x (m+n)
        cc = torch.stack(cc).squeeze() # n_batch x T x (m+n)
        disturbance = torch.stack(disturbance) # n_batch x T x n_dist
        agent.memory.append(states, actions, next_states, advantages, old_log_probs, disturbance, CC, cc)

        # if -1, do not update parameters
        if args.update_episode == -1:
            print("Pass")
            pass
        elif (agent.memory.len>= args.update_episode)&(i_episode % args.update_episode ==0):
            batch_states, batch_actions, b_next_states, batch_dist, batch_rewards, batch_old_logprobs, batch_CC, batch_cc = agent.memory.sample_batch(args.update_episode)
            batch_set = Dataset(batch_states, batch_actions, b_next_states, batch_dist, batch_rewards, batch_old_logprobs, batch_CC, batch_cc)
            batch_loader = data.DataLoader(batch_set, batch_size=48, shuffle=True, num_workers=2)
            agent.update_parameters(batch_loader, sigma)
        
        perf.append([np.mean(real_rewards), np.std(real_rewards)])
        print("{}, reward: {}".format(cur_time, np.mean(real_rewards)))

        save_name = args.save_name
        obs_df = pd.DataFrame(np.array(observations), index = np.array(timeStamp), columns = obs_name)
        action_df = pd.DataFrame(np.array(actions_taken), index = np.array(timeStamp[:-1]), columns = ["Delta T", "Supply Air Temp. Setpoint"])
        obs_df.to_pickle("results/perf_"+save_name+"_obs.pkl")
        action_df.to_pickle("results/perf_"+save_name+"_actions.pkl")
        pickle.dump(np.array(perf), open("results/perf_"+save_name+".npy", "wb"))
Ejemplo n.º 17
0
        return K.max(inputs, axis=2)


class PiecewiseMaxPool(Layer):
    def compute_output_shape(self, input_shape):
        return None, PCNN_OUTPUT_SIZE

    def call(self, inputs, **kwargs):
        inputs, segments = inputs

        seg1 = inputs * K.expand_dims(segments[:, :, 0])
        seg2 = inputs * K.expand_dims(segments[:, :, 1])
        seg3 = inputs * K.expand_dims(segments[:, :, 2])

        output1 = K.expand_dims(K.max(seg1, 1))
        output2 = K.expand_dims(K.max(seg2, 1))
        output3 = K.expand_dims(K.max(seg3, 1))
        output = K.concatenate([output1, output2, output3])
        output = K.reshape(output, [-1, PCNN_OUTPUT_SIZE])
        return output


if __name__ == "__main__":
    word_embeddings = np.random.random([10, WORD_EMBED_SIZE])
    position_embeddings_1 = np.random.random([10, POSITION_EMBED_SIZE])
    position_embeddings_2 = np.random.random([10, POSITION_EMBED_SIZE])
    char_embeddings = np.random.random([10, CHAR_EMBED_SIZE])
    tag_embeddings = np.random.random([10, TAG_EMBED_SIZE])
    embeddings = make_dict(word_embeddings, position_embeddings_1, position_embeddings_2, char_embeddings, tag_embeddings)
    build_model(embeddings)
Ejemplo n.º 18
0
obs_name = [
    "Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction",
    "Diff. Solar Rad.", "Direct Solar Rad.", "Htg SP", "Clg SP",
    "Indoor Temp.", "Indoor Temp. Setpoint", "PPD", "Occupancy Flag",
    "Coil Power", "HVAC Power", "Sys In Temp.", "Sys In Mdot", "OA Temp.",
    "OA Mdot", "MA Temp.", "MA Mdot", "Sys Out Temp.", "Sys Out Mdot"
]
dist_name = [
    "Outdoor Temp.", "Outdoor RH", "Wind Speed", "Wind Direction",
    "Diff. Solar Rad.", "Direct Solar Rad.", "Indoor Temp. Setpoint",
    "Occupancy Flag"
]

# Reset the env (creat the EnergyPlus subprocess)
timeStep, obs, isTerminal = env.reset()
obs_dict = make_dict(obs_name, obs)
start_time = pd.datetime(year=env.start_year,
                         month=env.start_mon,
                         day=env.start_day)
print(start_time)

timeStamp = [start_time]
observations = [obs]
actions = []

for i in range(91 * 96):
    # Using EnergyPlus default control strategy;
    action = ()
    timeStep, obs, isTerminal = env.step(action)
    obs_dict = make_dict(obs_name, obs)
    cur_time = start_time + pd.Timedelta(seconds=timeStep)
Ejemplo n.º 19
0
def evaluate_any_file():
    #os.system(scp )
    filepath = '../../original/processed_data/'
    weightpath = '../../scratch/bd_lstm/trainstats/weights_middle.pth'
    demoweights = '../../scratch/bd_lstm/trainstats/demoweights.pth'
    weightpath = demoweights
    parampath = '../../code/bdrnn/conf_model.cfg'
    filenamepath = '../../scratch/bd_lstm/filenames/testfiles.txt'
    minmaxdatapath = '../../original/minmaxdata/'

    #get best file
    filenames = read_names(filenamepath)
    print(len(filenames))
    filenamedict = make_dict(filenames)
    velocity = float(
        input(
            'Give rotational velocity between 4Hz and 18Hz and the closest one is used at evaluation.\n'
        ))
    filename, velocity = find_closest(filenamedict, velocity)
    files = [filename]

    #read parameters
    params = read_params(parampath)

    #init dataset with the file we selected and model
    dataset = DataSet(root_dir=filepath,
                      files=files,
                      normalize=False,
                      seq_len=params['slice_size'],
                      stride=1000)

    loader = DataLoader(dataset,
                        batch_size=int(params['batch_size']),
                        shuffle=True)

    model = LSTM_layers(input_size=int(params['input_size']),
                        hidden_size=int(params['hidden_size']),
                        num_layers=int(params['n_layers']),
                        dropout=float(params['dropout']),
                        output_size=int(params['output_size']),
                        batch_first=True,
                        bidirectional=True)
    #RuntimeError: Attempting to deserialize object on a
    #CUDA device but torch.cuda.is_available() is False.
    #If you are running on a CPU-only machine,
    #please use torch.load with map_location='cpu' to map your storages to the CPU.

    model.load_state_dict(torch.load(weightpath, map_location='cpu'))
    model.to(device)
    model.eval()
    losses = []

    for idx, sample in enumerate(loader):
        y = sample[:, :, :2].clone().detach().requires_grad_(True).to(device)
        x = sample[:, :, 2:].clone().detach().requires_grad_(True).to(device)
        h0 = model.init_hidden(int(params['batch_size']), None).to(device)
        c0 = model.init_cell(int(params['batch_size'])).to(device)

        #compute
        output = model.forward(x, (h0, c0))
        loss = F.mse_loss(output, y)
        losses.append(loss.item())

        output, y = scale_seqs(output, y, filename, minmaxdatapath)

        if (idx % 3) == 0:
            save_this_plot(0, 2763, output[0], y[0], loss.item(), velocity)
    print("Avg loss:", np.mean(losses))
Ejemplo n.º 20
0
def main():
    print("load train data")
    x_train = [
        np.load("data/train/%s.npy" % name) for name in [
            "words", "pos1", "pos2", "e1", "e2", "e1context", "e2context",
            "tags", "chars", "segments"
        ]
    ]
    y_train = np.load("data/train/y.npy")

    print("load test data")
    x_test = [
        np.load("data/test/%s.npy" % name) for name in [
            "words", "pos1", "pos2", "e1", "e2", "e1context", "e2context",
            "tags", "chars", "segments"
        ]
    ]
    y_test = np.load("data/test/y.npy")

    print("load embeddings")
    word_embeddings = np.load("data/embedding/word_embeddings.npy")
    position_embeddings_1 = np.load("data/embedding/position_embeddings_1.npy")
    position_embeddings_2 = np.load("data/embedding/position_embeddings_2.npy")
    char_embeddings = np.load("data/embedding/char_embeddings.npy")
    tag_embeddings = np.load("data/embedding/tag_embeddings.npy")
    embeddings = make_dict(word_embeddings, position_embeddings_1,
                           position_embeddings_2, char_embeddings,
                           tag_embeddings)

    print("training")
    config = ConfigProto()
    config.log_device_placement = False
    config.gpu_options.allow_growth = True
    sess = Session(config=config)
    K.set_session(sess)

    if not os.path.exists("model"):
        os.makedirs("model")
    filepath = "model/weights.best.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='max')
    model = build_model(embeddings)
    model.fit(x_train,
              y_train,
              batch_size=BATCH_SIZE,
              epochs=NB_EPOCHS,
              verbose=True,
              callbacks=[checkpoint],
              validation_data=[x_test, y_test])

    print("testing")
    model.load_weights("model/weights.best.hdf5")
    scores = model.predict(x_test, verbose=False)
    predictions = scores.argmax(-1)
    meta = """
POS tagging: yes
characted-level: yes
attention input: yes
PCNN: yes
"""
    gen_answer_key(predictions, meta=meta)
Ejemplo n.º 21
0
RNN_STEP = 5
EPOCHS = 1000
BATCH_SIZE = 2000
LEARNING_RATE = 0.001
SAMPLE_NUM = 8

# =============================================
## DATA PREPARATION
print("========================================")
print("Data Loading..")
train, validation, test = utils.load_data(TRAIN_DATA_PATH,
                                          VALIDATAION_DATA_PATH,
                                          TEST_DATA_PATH)

print("Make Dictionary..")
train, validation, test, user2id, id2user, venue2id, id2venue, venue_frequency = utils.make_dict(
    train, validation, test)

print("Make Input..")
train, validation, test = utils.make_input(train, validation, test, RNN_STEP)


# =============================================
def get_eval_score(candidate, rank):
    _mrr = .0

    for i in xrange(len(candidate)):
        _rank = np.where(rank[i] == candidate[i])
        _mrr += (1.0 / (_rank[0] + 1))

    return _mrr