currentdir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) sys.path.insert(0, parentdir) from datareader import DataReader import word2vec as wv import util file_path = os.path.join(parentdir, "data") file_path = os.path.join(file_path, "Wiki.txt") eval_path = os.path.join(parentdir, "evaluation") eval_path = os.path.join(eval_path, "questions-words-ptbr.txt") my_data = DataReader(file_path) my_data.get_data() word2index = my_data.word2index index2word = my_data.index2word BATCH_SIZE = np.array(range(1, 17)) * 10 number_of_exp = len(BATCH_SIZE) results = [] info = [] for i, bs in enumerate(BATCH_SIZE): print("\n ({0} of {1})".format(i + 1, number_of_exp)) config = wv.Config(batch_size=bs) attrs = vars(config) config_info = ["%s: %s" % item for item in attrs.items()] info.append(config_info) my_model = wv.SkipGramModel(config)
plt.title("Actul vs Predicted Load ({}) - {} Window size {}".format( location, "SVR_" + self.kernel_type, window_size)) plt.plot(list(range(len(preds))), testY, label='Actual Load') plt.plot(list(range(len(preds))), preds, label='Predicted Load') plt.xlabel('Time') plt.ylabel('Power Consumption (MW)') plt.legend() plt.show() return error if __name__ == '__main__': fname = "data/household.csv" # Works for household. Boosting does not. location = os.path.split(fname)[1].split(".")[0] datareader = DataReader(fname, sample_size=10000, encoding='Cosine') features, Y = datareader.get_data() window_size = 7 # 7 does not predict higher extreme values # 28 does not predict lower extreme values features = features[:-window_size] X, Y = window(Y, window_size) X = np.concatenate((X, features), axis=1) svm_poly = SVRRegression(kernel_type='poly') loss = svm_poly.fit_predict(X, Y, location) print("Loss : ", "%.2f" % loss)
class RegressionDataset(Dataset): def __init__(self, inputs, labels): self.inputs = inputs self.labels = labels def __len__(self): return len(self.inputs) def __getitem__(self, id): sample = self.inputs[id], self.labels[id] return sample if __name__ == '__main__': fname = "data/AEP_hourly.csv" datareader = DataReader(fname) X, Y = datareader.get_data() dataset = RegressionDataset(inputs=X, labels=Y) dataset_loader = DataLoader(dataset, batch_size=8, shuffle=False, num_workers=2) for i, [input, label] in enumerate(dataset_loader): print(input) print(label) print() if i == 2: break
def test_run(): '''function to test all the utlities''' # Define a date range dates = pd.date_range('2015-04-02', '2016-04-01') # Choose feature symbols to read location = os.path.join(base_dir, "BitcoinData") symbols = os.listdir(location) #build dataframe consisting of all features dfreader = DataReader() util = Utility() location = os.path.join(base_dir, "BitcoinData") df = dfreader.get_data(location, symbols, dates) df = util.normalize_data(df) for index in range(len(symbols)): symbols[index] = symbols[index].strip('.csv') plotter = DataPlotting() #plot dataframe in selected range and given features list plotter.plot_selected(df, symbols, '2015-05-01', '2015-06-01') #plot dataframe for all given data plotter.plot_data(df, "Bitcoin") dates = pd.date_range('2010-01-01', '2016-01-01') btc_file = "bitcoin-market-price.csv" location = os.path.join(base_dir, btc_file) df_btc = dfreader.get_btc(location, btc_file, dates) stats = Statistics(df) rmean = stats.get_rolling_mean(df_btc['bitcoin-market-price'], window=20) rstd = stats.get_rolling_std(df_btc.ix[:, 'bitcoin-market-price'], window=20) upper_band, lower_band = stats.get_bollinger_bands(rmean, rstd) # Plot raw values, rolling mean and Bollinger Bands ax = df_btc['bitcoin-market-price'].plot(title="Bollinger Bands", \ label='bitcoin-market-price') rmean.plot(label='Rolling mean', ax=ax) upper_band.plot(label='upper band', ax=ax) lower_band.plot(label='lower band', ax=ax) # Add axis labels and legend ax.set_xlabel("Date") ax.set_ylabel("Price") ax.legend(loc='upper left') plt.show() #compute daily returns daily_returns = stats.compute_daily_returns(df_btc) plotter.plot_data(daily_returns, title="Daily returns", ylabel="Daily returns") daily_returns.replace(to_replace=np.inf, value=np.NaN, inplace=True) # Plot a histogram daily_returns.hist(bins=21) # Get mean as standard deviation mean = daily_returns.mean() std = daily_returns.std() #print type(mean) plt.axvline(mean[0], color='w', linestyle='dashed', linewidth=2) plt.axvline(std[0], color='r', linestyle='dashed', linewidth=2) plt.axvline(-std[0], color='r', linestyle='dashed', linewidth=2) plt.show() # Scatterplots df.plot(kind='scatter', x='hash_rate', y='market_cap') beta_XOM, alpha_XOM = np.polyfit(df['hash_rate'], df['market_cap'], 1) # fit poly degree 1 plt.plot(df['hash_rate'], beta_XOM*df['market_cap'] + alpha_XOM, '-', color='r') plt.show() # Calculate correlation coefficient correlation = df['avg_block_size'].corr(df['n_tx'], method='pearson') print correlation