Example #1
0
currentdir = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

from datareader import DataReader
import word2vec as wv
import util

file_path = os.path.join(parentdir, "data")
file_path = os.path.join(file_path, "Wiki.txt")
eval_path = os.path.join(parentdir, "evaluation")
eval_path = os.path.join(eval_path, "questions-words-ptbr.txt")

my_data = DataReader(file_path)
my_data.get_data()
word2index = my_data.word2index
index2word = my_data.index2word

BATCH_SIZE = np.array(range(1, 17)) * 10
number_of_exp = len(BATCH_SIZE)
results = []
info = []

for i, bs in enumerate(BATCH_SIZE):
    print("\n ({0} of {1})".format(i + 1, number_of_exp))
    config = wv.Config(batch_size=bs)
    attrs = vars(config)
    config_info = ["%s: %s" % item for item in attrs.items()]
    info.append(config_info)
    my_model = wv.SkipGramModel(config)
Example #2
0
        plt.title("Actul vs Predicted Load ({}) - {}  Window size {}".format(
            location, "SVR_" + self.kernel_type, window_size))
        plt.plot(list(range(len(preds))), testY, label='Actual Load')
        plt.plot(list(range(len(preds))), preds, label='Predicted Load')
        plt.xlabel('Time')
        plt.ylabel('Power Consumption (MW)')
        plt.legend()
        plt.show()

        return error


if __name__ == '__main__':
    fname = "data/household.csv"  # Works for household. Boosting does not.
    location = os.path.split(fname)[1].split(".")[0]
    datareader = DataReader(fname, sample_size=10000, encoding='Cosine')
    features, Y = datareader.get_data()

    window_size = 7
    # 7 does not predict higher extreme values
    # 28 does not predict lower extreme values

    features = features[:-window_size]
    X, Y = window(Y, window_size)
    X = np.concatenate((X, features), axis=1)

    svm_poly = SVRRegression(kernel_type='poly')
    loss = svm_poly.fit_predict(X, Y, location)

    print("Loss : ", "%.2f" % loss)
Example #3
0
class RegressionDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, id):
        sample = self.inputs[id], self.labels[id]
        return sample


if __name__ == '__main__':
    fname = "data/AEP_hourly.csv"
    datareader = DataReader(fname)
    X, Y = datareader.get_data()

    dataset = RegressionDataset(inputs=X, labels=Y)
    dataset_loader = DataLoader(dataset,
                                batch_size=8,
                                shuffle=False,
                                num_workers=2)

    for i, [input, label] in enumerate(dataset_loader):
        print(input)
        print(label)
        print()
        if i == 2: break
def test_run():
    '''function to test all the utlities'''
    # Define a date range
    dates = pd.date_range('2015-04-02', '2016-04-01')

    # Choose feature symbols to read
    location = os.path.join(base_dir, "BitcoinData")
    symbols = os.listdir(location)

    #build dataframe consisting of all features
    dfreader = DataReader()
    util = Utility()
    location = os.path.join(base_dir, "BitcoinData")
    df = dfreader.get_data(location, symbols, dates)
    df = util.normalize_data(df)

    for index in range(len(symbols)):
        symbols[index] = symbols[index].strip('.csv')

    plotter = DataPlotting()
    #plot dataframe in selected range and given features list
    plotter.plot_selected(df, symbols, '2015-05-01', '2015-06-01')
    #plot dataframe for all given data
    plotter.plot_data(df, "Bitcoin")

    dates = pd.date_range('2010-01-01', '2016-01-01')
    btc_file = "bitcoin-market-price.csv"
    location = os.path.join(base_dir, btc_file)
    df_btc = dfreader.get_btc(location, btc_file, dates)

    stats = Statistics(df)
    rmean = stats.get_rolling_mean(df_btc['bitcoin-market-price'], window=20)
    rstd = stats.get_rolling_std(df_btc.ix[:, 'bitcoin-market-price'], window=20)
    upper_band, lower_band = stats.get_bollinger_bands(rmean, rstd)

    # Plot raw values, rolling mean and Bollinger Bands
    ax = df_btc['bitcoin-market-price'].plot(title="Bollinger Bands", \
                                            label='bitcoin-market-price')
    rmean.plot(label='Rolling mean', ax=ax)
    upper_band.plot(label='upper band', ax=ax)
    lower_band.plot(label='lower band', ax=ax)

    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()

    #compute daily returns
    daily_returns = stats.compute_daily_returns(df_btc)
    plotter.plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

    daily_returns.replace(to_replace=np.inf, value=np.NaN, inplace=True)
    # Plot a histogram
    daily_returns.hist(bins=21)

    # Get mean as standard deviation
    mean = daily_returns.mean()
    std = daily_returns.std()

    #print type(mean)
    plt.axvline(mean[0], color='w', linestyle='dashed', linewidth=2)
    plt.axvline(std[0], color='r', linestyle='dashed', linewidth=2)
    plt.axvline(-std[0], color='r', linestyle='dashed', linewidth=2)
    plt.show()

    # Scatterplots
    df.plot(kind='scatter', x='hash_rate', y='market_cap')
    beta_XOM, alpha_XOM = np.polyfit(df['hash_rate'], df['market_cap'], 1)  # fit poly degree 1
    plt.plot(df['hash_rate'], beta_XOM*df['market_cap'] + alpha_XOM, '-', color='r')
    plt.show()

    # Calculate correlation coefficient
    correlation = df['avg_block_size'].corr(df['n_tx'], method='pearson')
    print correlation