コード例 #1
0
import numpy as np
import pandas as pd

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls

from kaggle.competitions import twosigmanews

pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999

# Get 2Sigma environment
env = twosigmanews.make_env()
# Get the data
mt_df, nt_df = env.get_training_data()
mt_df.head()
print("We have {:,} market samples in the training dataset.".format(mt_df.shape[0]))
mt_df.dtypes
mt_df.isna().sum()
mt_df.nunique()
asset1Code = 'AAPL.O'
asset1_df = mt_df[(mt_df['assetCode'] == asset1Code) & (mt_df['time'] > '2015-01-01') & (mt_df['time'] < '2017-01-01')]
# Create a trace
trace1 = go.Scatter(
    x = asset1_df['time'].dt.strftime(date_format='%Y-%m-%d').values,
    y = asset1_df['close'].values
)
コード例 #2
0
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import lightgbm as lgb

from itertools import chain

%matplotlib inline

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))
env = twosigmanews.make_env() # load env
df_market = env.get_training_data()[0] # load only market data
df_news = env.get_training_data()[1]   # load only news data
# Any results you write to the current directory are saved as output.

print(df_market.isnull().sum())

# Fill empty market fields
def fillMarketEmpty(df_market):
    fill_value=-9999.99
    df_market['returnsClosePrevMktres1'] = df_market['returnsClosePrevMktres1'].fillna(fill_value)
    df_market['returnsOpenPrevMktres1'] = df_market['returnsOpenPrevMktres1'].fillna(fill_value)
    df_market['returnsClosePrevMktres10'] = df_market['returnsOpenPrevMktres10'].fillna(fill_value)
    df_market['returnsOpenPrevMktres10'] = df_market['returnsOpenPrevMktres10'].fillna(fill_value)
    return df_market