scaler = joblib.load(os.path.join("data", "scaler"+"%s"%i+".pkl")) #scaler = StandardScaler().fit() raw_data = pd.read_csv(os.path.join("data_foreach", "all.csv"), nrows=100 if debug else None) targ_cols = ("11",) # predict time series use test dataset train_num = int(len(raw_data) * 0.7) data = preprocess_data(raw_data, targ_cols, scaler, train_num) with open(os.path.join("data", "da_rnn_kwargs.json"), "r") as fi: da_rnn_kwargs = json.load(fi) final_y_pred = predict(enc.cuda(), dec.cuda(), data, **da_rnn_kwargs) plt.figure() plt.plot(data.targs[(da_rnn_kwargs["T"]-1):], label="True") plt.plot(final_y_pred, label='Predicted') plt.legend(loc='upper left') utils.save_or_show_plot("final_predicted_reloaded.png", save_plots) #inverser transform X1 = scaler.inverse_transform(np.concatenate((data.feats[(da_rnn_kwargs["T"]-1):,:4],final_y_pred,data.feats[(da_rnn_kwargs["T"]-1):,4:]),axis=1)) final_y_pred_1 = X1[:,4] raw_data = raw_data[train_num:]
print(enc_model) print('Load pretrained encoder ok') train_data = read_corpus(config.train_file, config.min_edu_num, config.max_edu_num) masked_word_counter, train_data = mask_edu(train_data, config, tok) print("Training doc: ", len(train_data)) vocab = creatVocab(train_data, config, masked_word_counter) pwordEnc = PretrainedWordEncoder(config, enc_model, enc_model.bert_hidden_size, enc_model.layer_num) wordLSTM = WordLSTM(vocab, config) sent2span = Sent2Span(vocab, config) EDULSTM = EDULSTM(vocab, config) dec = Decoder(vocab, config) pickle.dump(vocab, open(config.save_vocab_path, 'wb')) if config.use_cuda: pwordEnc.cuda() wordLSTM.cuda() sent2span.cuda() EDULSTM.cuda() dec.cuda() edupred = EDUPred(pwordEnc, wordLSTM, sent2span, EDULSTM, dec, config) train(train_data, edupred, vocab, config, tok)