Example #1
0
def read_dir(datestr, print_date=False):
    if print_date:
        print(datestr)
    return pd.concat([
        read_file(f, datestr)
        for f in os.listdir(mda.data_dir('FTSE100', datestr))
    ])
Example #2
0
def get_s3(download_date):
    proxy_args = {
        'bucket': 'ftse100',
        'local_dir': mda.data_dir('FTSE100', download_date),
        'prefix': 'raw/' + download_date + '/'
    }
    s3 = S3Proxy(**proxy_args)
    return s3
def read():
    df = pd.concat(
        pd.read_csv(
            mda.data_dir('doing_data_science', 'dds_datasets',
                         'nyt{}.csv'.format(i)), ).assign(Day=i)
        for i in range(1, 32))
    df.columns = [col.lower() for col in df.columns]
    return df
Example #4
0
from __future__ import division

import os
import re
import time

import boto3
from tqdm import tqdm

import mda
from mda.io.google_finance import LseReader
from mda.io.s3 import S3Proxy

__author__ = 'mattmcd'

dataLoc = mda.data_dir('FTSE100')


def get_all(do_copy=False):
    """Get last 10 days of 1 minute intraday data from FTSE 100
    Args:
        do_copy: copy downloaded files to S3

    Returns:
        <none> Creates saved text files
    """
    reader = LseReader()
    download_date = time.strftime("%Y%m%d")
    save_loc = os.path.join(dataLoc, download_date)
    if not os.path.isdir(save_loc):
        os.mkdir(save_loc)
Example #5
0
def read_file(ticker_file, datestr):
    with open(mda.data_dir('FTSE100', datestr, ticker_file)) as f:
        return parse_text(f.read(), ticker=ticker_file.split('.')[0])
Example #6
0
import mda
import urllib2
import numpy as np
import pandas as pd
import os

ftseFile = mda.data_dir('FTSE100', 'FTSE100.csv')


class LseReader:
    def __init__(self, interval=60, period=10):
        """Constructor
        :param interval: time in seconds between downloaded values
        :param period: period in days to download
        :return: LseReader
        """

        self.ftse100 = pd.read_csv(ftseFile)
        self.prefixURL = 'https://www.google.com/finance/getprices?'
        self.interval = interval
        self.period = period

    def read_url(self, ticker, interval=None, period=None):
        """Read intraday history for selected ticker on LSE
        :param interval: time in seconds between downloaded values
        :param period: period in days to download
        :return: txt : downloaded price data as string
        :return: interval : interval in seconds between downloaded prices
        """
        if not interval:
            interval = self.interval
Example #7
0
import numpy as np
import pandas as pd
import mda
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_pickle(mda.data_dir('Events_20161207_20161208.pkl'))
sessions = df.groupby('session_id')['collector_tstamp'].agg(['min', 'max'])
sessions['duration'] = (sessions['max'] - sessions['min']) / pd.Timedelta(
    1, 's')
sessions = pd.merge(sessions,
                    df[['session_id', 'app_id']].drop_duplicates(),
                    left_index=True,
                    right_on='session_id')
sessions.loc[sessions.app_id == 'phone', 'app'] = 'Wanda'
sessions.loc[sessions.app_id.str.contains('mondo'), 'app'] = 'Mondo'

g = sns.FacetGrid(sessions.query('10 < duration < 600'), col='app')
g.map(plt.hist, 'duration', normed=True)

df = pd.merge(df, sessions)
df['t'] = (df['collector_tstamp'] - df['min']) / pd.Timedelta(1, 's')

df.loc[df.se_category.str.contains('TryItClick'), 'event'] = 1

g = sns.FacetGrid(df.query('t < 600'), row='app')
g.map(plt.scatter, 't', 'event', alpha=0.05)

g = sns.FacetGrid(df.query('event == 1 and t < 600'), row='app')
g.map(plt.hist, 't', normed=True)