Beispiel #1
0
    def __init__(self, sample_buildings=500, modeling_hours=1000):
        # Inputs
        # random.seed(123)
        self.building_classes = 25  # building class
        self.total_buildings = 815032  # building parcels
        self.sample_buildings = sample_buildings
        self.doe_archetypes = 19  # building archetypes
        self.total_hours = 8784  # hours in a year
        self.modeling_hours = modeling_hours  # hours to model

        # Load Files and store as np arrays
        self.m = pd.read_csv('/Users/jonathanroth/PycharmProjects/UBEM/M.csv').values[:, 1:].astype(float)  # [self.building_classes x K]
        self.a = pd.read_csv('/Users/jonathanroth/PycharmProjects/UBEM/A.csv').values[:, 2:].astype(float)  # [self.sample_buildings x self.building_classes]
        self.pluto_export = pd.read_csv('/Users/jonathanroth/PycharmProjects/UBEM/Pluto_export.csv')
        self.building_energy = self.pluto_export['Energy_kbtu'].values.astype(float)

        self.nyc_8784_electricity= pd.read_csv('/Users/jonathanroth/PycharmProjects/UBEM/NYC_8784_ElectricityUse.csv')
        self.city_electricity = self.nyc_8784_electricity['Load'].values.astype('float')
        self.city_electricity_scaled = self.city_electricity / np.mean(self.city_electricity[:self.total_hours])

        self.doe_ref_buildings = pd.read_csv('/Users/jonathanroth/PycharmProjects/UBEM/DOE_RefBuildings.csv')
        self.temperature = self.doe_ref_buildings['Temperature'].values.astype(float).reshape((self.total_hours, 1))
        self.cdd = self.doe_ref_buildings['Cooling_Degree_Hour'].values.astype(float).reshape((self.total_hours, 1))
        self.date_time = self.doe_ref_buildings['Date_Time'].values.astype(str).reshape((self.total_hours, 1))
        self.bday = np.array([isbday(self.date_time[i, 0][0:10], holidays=holidays.US())
                              for i in range(len(self.date_time))]).astype(int)  # TODO

        # MANUAL SCALING and X matrix
        self.sf = 0.5
        self.x = np.zeros([self.doe_archetypes, self.doe_archetypes, self.total_hours]).astype(float)

        for k in range(self.doe_archetypes):
            self.x[k, k, :] = self.doe_ref_buildings.values[:, k + 2]
            self.x[k, k, :] = self.sf * (self.x[k, k, :]) / np.mean(self.x[k, k, :self.total_hours]) + (1 - self.sf)
        print('Shape of X: ', np.shape(self.x))
def ReplaceWith(x,y):
    """ x is position of data, and y is corresponding date"""      
    #first day replace with t-1  #first week replace with t-96 
    # all other t-n*96,n is earlier day of same type (business or holiday) 
    if x==1:
        return 1
    elif x>1 and x<96:
        return x-1
    elif x>=96 and x<=96*7:
        return x-96
    else:
        sim_day=1
        nday=datetime.timedelta(days=1)
        while isbday(y, holidays=holidays.US())!=isbday(y-sim_day*nday, holidays=holidays.US()):
            sim_day=sim_day+1
        return x-96*sim_day 
def get_earliest_available_apt_datetime(now_date_time):
    """
    This function takes a datetime object and returns the earliest possible business datetime starting from the given
    datetime object.

    :param now_date_time:
    :return:
    """

    if not isbday(now_date_time):
        earliest_available_date_time = now_date_time + BDay(1)
        earliest_available_date_time = earliest_available_date_time.replace(
            hour=15, minute=0, second=0, microsecond=0)
    else:
        current_time = datetime.time(hour=now_date_time.hour,
                                     minute=now_date_time.minute,
                                     second=now_date_time.second,
                                     microsecond=now_date_time.microsecond)

        if current_time > END_OF_BUSINESS_TIMESTAMP:
            earliest_available_date_time = now_date_time + BDay(1)
            earliest_available_date_time = earliest_available_date_time.replace(
                hour=15, minute=0, second=0, microsecond=0)
        elif current_time < START_OF_BUSINESS_TIMESTAMP:
            earliest_available_date_time = now_date_time.replace(hour=15,
                                                                 minute=0,
                                                                 second=0,
                                                                 microsecond=0)
        else:
            earliest_available_date_time = now_date_time

    return earliest_available_date_time
    def get_start_date(self, past_months):
        """
        Returns the date user wants to start collecting information.
        :param past_months: number of months back user wants to research
        """

        current_date = list(self.stock_dates.keys())[
            0]  # Gets most current day of the stock market

        year = int(current_date[:4])
        month = int(current_date[5:7])
        day = int(current_date[8:10])

        begin_date = date(year, month, day) - relativedelta(months=past_months)

        year = begin_date.year
        month = begin_date.month
        day = begin_date.day

        # Changes single digit to double.
        if len(str(month)) is 1:
            month = "0" + str(month)

        if len(str(day)) is 1:
            day = "0" + str(day)

        # Date changes format.
        start_date = str(year) + "-" + str(month) + "-" + str(day)

        # Checks to make sure start date is a business day, if not moves to next business day.
        if not isbday(start_date, holidays=holidays.US()):
            start_date += relativedelta(bdays=+1, holidays=holidays.US())

        return str(start_date)[:10]
def generate_date_based_cols(data, date_col):
    dates = list(
        map(lambda x: datetime.strptime(str(x), '%Y%m%d'), data[date_col]))
    data['DayOfMonth'] = list(map(lambda x: x.day, dates))
    data['DayOfWeek'] = list(map(lambda x: x.isoweekday(), dates))
    data['Month'] = list(map(lambda x: x.month, dates))
    data['isBusinessDay'] = list(map(lambda x: 1 * isbday(x), dates))
    return data
    def updTime(self, inc):
        '''
        Increases the time by inc amount
        Changes both self.time and self.dataStore.time
        '''

        # Business day support
        prevDate = datetime.fromtimestamp(self.time).date()
        nextTimeHour = datetime.fromtimestamp(self.time + inc).time().hour
        nextDate = datetime.fromtimestamp(self.time + inc).date()
        secInDay = 86400

        if (debug):
            print 'Time before:', self.time, self.dataStore.time
            print 'Hour after increment:', nextTimeHour
            print 'Time right now:', datetime.fromtimestamp(self.time).time()

        if (not (nextTimeHour >= 9 and nextTimeHour < 16)
                or (not isbday(nextDate))):
            if ((nextTimeHour >= 16) or (not isbday(nextDate))):
                self.time = dt2ut(
                    datetime.fromtimestamp(self.time) + (1) * BDay())
            self.time = dt2ut(
                datetime.fromtimestamp(self.time).replace(hour=3,
                                                          minute=30,
                                                          second=0,
                                                          microsecond=0))
            self.dataStore.time = self.time
            self.nextLogTime = self.time
            if (debug):
                print(
                    datetime.fromtimestamp(self.time).date(),
                    datetime.fromtimestamp(self.time).time())
        else:
            self.time += inc
            self.dataStore.time += inc
            self.nextLogTime += inc
            if (debug):
                print(
                    datetime.fromtimestamp(self.time).date(),
                    datetime.fromtimestamp(self.time).time())

        if (debug):
            print 'Time After:', self.time, self.dataStore.time
Beispiel #7
0
def callback3():
    global last_day_events, list2, list3

    date_entry.config(state='disabled')
    date_submit.config(state='disabled')
    case_num.config(state='disabled')
    new_lbl = tk.Label(root, text="Last Day Event(s): ")
    new_lbl.grid(row=17, column=1)

    list2 = tk.Listbox(root, height=0, width=75, selectmode='SINGLE')
    list2.bind(
        '<<ListboxSelect>>',
        on_select1,
    )
    list2.grid(row=18, column=1)
    list3_lbl = tk.Label(root, text="Last Day Date")
    list3_lbl.grid(row=17, column=2)
    list3 = tk.Listbox(root, height=0, width=20, selectmode='SINGLE')
    list3.bind('<<ListboxSelect>>', on_select2)
    list3.grid(row=18, column=2)
    for item in last_day_events:
        list2.insert('end', item)

    date = date_entry_var.get()
    us_holidays = holidays.UnitedStates()
    for item in num_days:

        if (item < 7):
            d = datetime.datetime.strptime(
                date, '%m/%d/%Y') + relativedelta(bdays=+item)
            if (isbday(d, holidays=holidays.US()) == False):
                d = d + relativedelta(bdays=+1)
                list3.insert('end', d.strftime('%m/%d/%Y'))
            elif (isbday(d, holidays=holidays.US()) == True):
                list3.insert('end', d.strftime('%m/%d/%Y'))
        elif (item >= 7):
            d = datetime.datetime.strptime(
                date, '%m/%d/%Y') + datetime.timedelta(days=item)
            if (isbday(d, holidays=holidays.US()) == False):
                d = d + relativedelta(bdays=+1)
                list3.insert('end', d.strftime('%m/%d/%Y'))
            elif (isbday(d, holidays=holidays.US()) == True):
                list3.insert('end', d.strftime('%m/%d/%Y'))
    def is_quote_delayed(self, last_bar_time):
        dtnow = self.date_now()
        if bdateutil.isbday(
                dtnow,
                holidays=holidays.US()) and dtnow.hour > 8 and dtnow.hour < 13:
            if int(abs((dtnow - last_bar_time).total_seconds() /
                       60.0)) > self.args.delay:
                return True

        return False
def dealWith_business_day(Date):
    """
    Deal with the date not in business.
    If the day is weekend, add 2 days to the date for getting a business day.
    """
    date = dt.datetime.strptime(Date, '%Y-%m-%d')
    if not isbday(date):
        date += dt.timedelta(2)
    else:
        pass
    return date
Beispiel #10
0
def findLastClose(df):

    # Find the last business day
    dBefore = 1
    while not isbday(
        (datetime.now() - timedelta(dBefore)), holidays=holidays.US()):
        dBefore += 1

    # create new df for that day and reindex
    datestr = (datetime.now() - timedelta(dBefore)).strftime("%Y-%m-%d")
    df = df.loc[df["stamp"] == datestr]
    df.index = pd.RangeIndex(len(df.index))

    return df.at[0, "close"]
Beispiel #11
0
    def run(self):
        """
        Application main()
        :return:
        """
        if not bdateutil.isbday(datetime.datetime.now(), holidays=holidays.US()):
            self.log.info("Run is skipped due to non business day")
            return

        # Populating account positions
        operations = []
        update_date = 'N/A'

        for acc_pos_dict in self.mongo_db['accounts_positions'].find({}):
            # Shrinking time of the timestamp
            update_date = datetime.datetime.combine(acc_pos_dict['date_now'].date(), datetime.time(0,0,0))
            # 'date_now' - main timestamp of collection
            acc_pos_dict['date_now'] = update_date

            del acc_pos_dict['_id']

            # Add MongoDB bulk operation
            operations.append(ReplaceOne(
                {'date_now': update_date, 'name': acc_pos_dict['name']},
                acc_pos_dict,
                upsert=True,
            ))
        self.log.info("Last collection update date: {0}".format(update_date))
        # Execute bulk upsert to Mongo
        pp = pprint.PrettyPrinter(indent=4)
        try:
            bulk_result = self.mongo_db['accounts_positions_archive'].bulk_write(operations, ordered=False)
            self.log.info("Bulk write result succeed: \n{0}".format(pp.pformat(bulk_result.bulk_api_result)))

            self.signal_app.send(MsgStatus("OK",
                                           "Positions archive created",
                                           notify=True,
                                           )
                                 )
        except BulkWriteError as exc:
            self.log.error("Bulk write error occured: {0}".format(pp.pformat(exc.details)))
            self.signal_app.send(MsgStatus("ERROR",
                                           "Positions archive error while writing to MongoDB",
                                           notify=True,
                                           )
                                 )
Beispiel #12
0
    def Homework1(self,
                  tickerList,
                  weight,
                  notional,
                  startDate=None,
                  endDate=None,
                  historicalWindow=252,
                  intervals=[0.995, 0.99, 0.98, 0.975, 0.95]):
        if (startDate == None and endDate == None):
            today = dt.date.today()
            today = today - dt.timedelta(days=1)
            # Get the latest business day
            while (isbday(today, holidays=holidays.US()) == False):
                today = today - dt.timedelta(days=1)

            endDate = today
            startDate = today + relativedelta(bdays=-historicalWindow)

        result = {}
        paraDict = {}
        hisDict = {}
        weight = np.array(weight)
        priceTabel = self.dataSource.getPriceTable(tickerList,
                                                   startDate.isoformat(),
                                                   endDate.isoformat())
        while (len(priceTabel) < historicalWindow):
            startDate = startDate - dt.timedelta(historicalWindow -
                                                 len(priceTabel))
            priceTabel = self.dataSource.getPriceTable(tickerList,
                                                       startDate.isoformat(),
                                                       endDate.isoformat())
        parametric = ValueAtRisk(0.95, priceTabel.as_matrix(), weight)
        historical = HistoricalVaR(0.95, priceTabel.as_matrix(), weight)
        for interval in intervals:
            parametric.setCI(interval)
            historical.setCI(interval)
            key = interval
            # the 1 here means daily var
            paraDict[key] = parametric.var(marketValue=notional, window=1)
            hisDict[key] = historical.var(marketValue=notional,
                                          window=historicalWindow)
        result['Parametric'] = paraDict
        result['Historical'] = hisDict
        result['Cov-Var Matrix'] = parametric.covMatrix()
        return result
def main():
    if isbday(datetime.datetime.now()):
        initial = 17231.72
        cash = 10.03
        stocks = ['SMTPC.PA', 'DG.PA', 'NXI.PA', 'COX.PA', 'ABCA.PA', 'IPH.PA']
        pru = [33.64054, 65.31130, 47.38463, 11.37695, 6.03034, 10.81]
        nbaction = [148, 61, 84, 46, 673, 64]
        my_port = {'Stock': stocks, 'PRU': pru, 'Nb Action': nbaction}
        my_port = updatePortfolio(initial, my_port)
        updateStocksCsv('export_stocks.csv', my_port)
        account = computePortfolioYield(my_port, cash, initial)
        updateAccountCsv('export_account.csv', account)
        try:
            plotPortfolioVsIndexes('export_account.csv')
        except:
            print("Erreur")
            exit(1)
    else:
        print("Jour non-ouvré")
        exit(0)
Beispiel #14
0
def featureCreation(feed,
                    window,
                    h,
                    grouper,
                    dataDir,
                    apiDic,
                    r_id=None,
                    longestfeed=False):
    import pandas as pd
    from bdateutil import isbday
    import holidays
    from sklearn.decomposition import PCA

    feed = pd.DataFrame(feed)
    r_lat_long = apiDic.loc[(apiDic['id'] == int(r_id)), 'lat_long'][1]

    # Quarter of hour
    counter = 0
    array = []
    for i in pd.date_range('00:00', '23:45', freq=grouper):
        feed.loc[(feed.index.hour == i.hour) & (feed.index.minute == i.minute),
                 grouper] = counter
        array.append(feed.loc[feed[grouper] == counter].values)
        counter += 1

# Hour of day
    feed['hourofday'] = feed.index.hour

    # Day of week
    feed['dayofweek'] = feed.index.dayofweek

    # Month
    feed['month'] = feed.index.month

    # Working day
    f = np.vectorize(lambda x: isbday(
        x, holidays=holidays.UK(years=[2013, 2014, 2015, 2016, 2017])))
    feed['isworkingday'] = f(feed.index.date)

    # Weather data
    weather = pd.DataFrame.from_csv(
        os.path.join(dataDir, 'WEATHER_DATA',
                     '%s.csv' % r_lat_long.replace(" ", "")))

    # Converting text date into datetime
    weather['cleandate'] = weather['utcdate'].apply(lambda x: evalDate(x))

    weather.index = weather['cleandate']

    # Deleting irrelevant columns
    if 'date' in weather.columns:
        del weather['date']

    if 'date.1' in weather.columns:
        del weather['date.1']

    if 'utcdate' in weather.columns:
        del weather['utcdate']

    if 'Unnamed: 0' in weather.columns:
        del weather['Unnamed: 0']

    # Droping duplicates
    weather = weather.drop_duplicates(subset='cleandate')

    weather = weather.reindex(
        pd.date_range(weather['cleandate'].min(),
                      weather['cleandate'].max(),
                      freq=grouper))  #, method='backfill')

    weather = weather.loc[:,
                          ('conds', 'dewptm', 'fog', 'hail', 'hum', 'precipm',
                           'pressurem', 'rain', 'snow', 'tempm', 'thunder',
                           'wdire', 'wgustm', 'windchillm', 'wspdm')]

    weather.loc[:, 'conds'] = weather.loc[:, 'conds'].fillna('Unknown')
    weather.loc[:, 'wdire'] = weather.loc[:, 'wdire'].fillna('Variable')

    le = le2 = preprocessing.LabelEncoder()
    le.fit(weather['conds'])
    weather['conds'] = le.transform(weather['conds'])
    le2.fit(weather['wdire'])
    weather['wdire'] = le2.transform(weather['wdire'])

    weather.replace([-9999.0, -999.0], [np.nan, np.nan], inplace=True)

    weather.loc[:, ('precipm', 'wgustm')] = weather.loc[:,
                                                        ('precipm',
                                                         'wgustm')].fillna(0)

    weather.windchillm = weather.windchillm.fillna(weather.tempm)

    weather = weather.interpolate()

    if (weather.index.min() < feed.index.min()):
        if (weather.index.max() < feed.index.max()):
            weather = weather.ix[feed.index.min():, :]
            feed = feed.ix[:weather.index.max(), :]
        else:
            weather = weather.ix[feed.index.min():feed.index.max(), :]
    else:
        if (weather.index.max() < feed.index.max()):
            feed = feed.ix[weather.index.min():weather.index.max(), :]
        else:
            feed = feed.ix[weather.index.min():, :]
            weather = weather.ix[:feed.index.max(), :]

    features, response = mlf.ts_to_mimo(feed.ix[:, 0], window, h)

    n_factors = 1
    pca = PCA(n_components=n_factors)
    pca.fit(weather)
    while (pca.explained_variance_ratio_.sum() < 0.99):
        n_factors += 1
        pca = PCA(n_components=n_factors)
        pca.fit(weather)

    reduced = pd.DataFrame(pca.transform(weather))

    c = np.zeros((features.shape[0], (h * len(reduced.columns))))

    for column in range(len(reduced.columns)):
        c[:, (column * h):((1 + column) * h)] = mlf.weather_to_mimo(
            reduced.ix[:, column], window, h)

    features = np.concatenate((feed.ix[(window + h - 1):,
                                       ('isworkingday', grouper, 'hourofday',
                                        'dayofweek', 'month')], c, features),
                              axis=1)

    print('Features created')

    return (features, response)
def verify_price(symbol_id, y_data, q_data):
    """
    Verifies the OHLCAV data from Quandl and Yahoo for a given company, accounting
    for missing dates of data, missing entries of data (e.g. No closing price), and differences in
    precision. If one of the vendors is missing a date, we use the other vendors
    data. If both both vendors have different values for the same data point, we
    assign it zero. All zero entries will be reevaluated in the spike script.
    :param symbol_id: Used to identify the company
    :param y_data: OHLCAV Dataframe from Yahoo Finance
    :param q_data: OHLCAV Dataframe from Quandl
    :return: Dataframe of the cross referenced prices from Yahoo and Quandl
    """
    number_of_data_points = 0
    number_of_days = 0
    invalid_data_points = 0
    verified_prices = pd.DataFrame(columns=['price_date',
                                            'created_date', 'last_updated_date', 'open_price', 'high_price',
                                            'low_price', 'close_price', 'adj_close_price', 'volume'])

    # Gets the earliest and latest date. Used to handle the edge case
    # where we might have one vendors range of data differ from the others
    start_date, last_date = get_date_ranges(y_data, q_data)

    # Iterate through the dates, ignoring non-business days
    for date in pd.bdate_range(start_date, last_date):
        # Make sure to exclude American holidays
        if isbday(date, holidays=holidays.US()):
            zero_counter = 0
            number_of_data_points += 4
            number_of_days += 1
            print "Processing prices for: " + date.strftime("%Y-%m-%d")
            new_data_row = [symbol_id, date, timestamp, timestamp]
            y_daily_price = None
            q_daily_price = None
            # Convert Pandas Timestamp to a Date (YYYY-mm-DD)
            date = date.to_datetime().date()

            # Only pull the OHLCAV data if it is exists in the Dataframe
            if isinstance(y_data, pd.DataFrame):
                if not (y_data.loc[y_data['price_date'] == date]).empty:
                    y_daily_price = y_data.loc[y_data['price_date'] == date]

            if isinstance(q_data, pd.DataFrame):
                if not (q_data.loc[q_data['price_date'] == date]).empty:
                    q_daily_price = q_data.loc[q_data['price_date'] == date]

            # Determine if either vendor is missing data
            temp_prices = check_date(date, y_daily_price, q_daily_price)

            # Case 1: Only one vendor had data for the specific date, so we will use that vendors data
            if isinstance(temp_prices, pd.DataFrame):
                ohlcav_data_row = format_data(temp_prices)
                new_data_row.extend(ohlcav_data_row)
                zero_counter = 4

            # Case 2: Neither vendor had data, so we assign zeros to all values for this date
            elif not temp_prices:
                print "Neither Vendor had data for " + date.strftime(
                    "%Y-%m-%d") + " so we will fill the days price data with zeros"
                new_data_row.extend([0, 0, 0, 0, 0, 0])
                # Excluding Volume and Adjusted Close
                zero_counter = 4

            # Case 3: Compare the OHLCAV data
            else:
                yOHLCAV = format_data(y_daily_price)
                qOHLCAV = format_data(q_daily_price)

                data_row, zero_counter = compare_price_data(yOHLCAV, qOHLCAV)
                new_data_row.extend(data_row)

            # Create dataframe row from list of values
            price_df = pd.DataFrame(data=[new_data_row], columns=['symbol_id', 'price_date',
                                                                  'created_date', 'last_updated_date', 'open_price',
                                                                  'high_price', 'low_price',
                                                                  'close_price', 'adj_close_price', 'volume'])

            # Add the new price data for the specific date to our dataframe
            verified_prices = verified_prices.append(price_df, ignore_index=True)

            invalid_data_points += zero_counter
        else:
            print str(date.strftime("%Y-%m-%d")) + " is a US holiday!"

    stats_and_data = [verified_prices, number_of_data_points, invalid_data_points, number_of_days]
    return stats_and_data
    total_tickers = 0
    total_days = 0

    """Parameters to use to gather price data over a period of time """
    # Format: 'YYYY-MM-DD'
    start = '1998-01-01'
    end = '2016-10-13'


    """Parameters to use to gather the most recent days price data """
    #start = datetime.date.today().strftime("%Y-%m-%d")
    #end = datetime.date.today().strftime("%Y-%m-%d")

    # When just updating the most recent day, end the script if it is not a business day
    if start == end:
        if not isbday(start, holidays=holidays.US()):
            print "Not a business day, ending program."
            sys.exit()

    tickers = retrieve_db_tickers(con)
    # Collect data for each company
    for t in tickers:
        total_tickers += 1
        print "Cleaning price data for ticker: " + t[1]

        # Gather initial datasets from both vendors
        yahoo_data = retrieve_price_data(con, t[1], 1, start, end)
        quandl_data = retrieve_price_data(con, t[1], 2, start, end)

        if isinstance(yahoo_data, pd.DataFrame) or isinstance(quandl_data, pd.DataFrame):
            # Clean data and add to the database
Beispiel #17
0
 def test_isbday(self):
     self.assertFalse(isbday(date(2014, 1, 4)))
     self.assertFalse(isbday("2014-01-04"))
     self.assertTrue(isbday(date(2014, 1, 1)))
     self.assertTrue(isbday("2014-01-01"))
     self.assertFalse(isbday(date(2014, 1, 1), holidays=holidays.US()))
     self.assertTrue(isbday(datetime(2014, 1, 1, 16, 30)))
     self.assertTrue(isbday(datetime(2014, 1, 1, 17, 30)))
     self.assertFalse(isbday(datetime(2014, 1, 1, 16, 30),
                      holidays=holidays.US()))
     self.assertFalse(isbday(datetime(2014, 1, 1, 17, 30),
                      holidays=holidays.US()))
     bdateutil.HOLIDAYS = holidays.Canada()
     self.assertFalse(isbday(date(2014, 7, 1)))
     self.assertTrue(isbday(date(2014, 7, 4)))
     self.assertFalse(isbday(date(2014, 1, 1)))
     self.assertTrue(isbday(date(2014, 7, 1), holidays=holidays.US()))
     self.assertFalse(isbday(date(2014, 7, 4), holidays=holidays.US()))
     bdateutil.HOLIDAYS = []
Beispiel #18
0
    def getPriceTable(self,
                      tickerList,
                      startDate=None,
                      endDate=None,
                      localCheck=None,
                      dateAscending=True,
                      update=False):
        # Get the price series for multiple tickers
        # ----Input-----
        # tickerList: ticker name for multiple stocks
        # startDate: the start date of price series, the format is 'YYYY-MM-DD'
        # endDate: the end date of price series, the format is 'YYYY-MM-DD'
        # dateAscending: whether rank the price series by date ascending, the default value is true
        # localCheck: loading local csv file, check the existing data see whether we need to retrieve data from Yahoo. The local file should contain date as index.
        # update: whether to update local file
        # ----output----
        # price series for single stock in pandas DataFrame format and use date as index

        if (endDate == None):
            endDate = dt.date.today() + relativedelta(bdays=-1,
                                                      holidays=holidays.US())
        else:
            endDate = dt.datetime.strptime(endDate, '%Y-%m-%d').date()
        if (startDate == None):
            startDate = endDate + relativedelta(bdays=-252,
                                                holidays=holidays.US())
        else:
            startDate = dt.datetime.strptime(startDate, '%Y-%m-%d').date()

        if (startDate > endDate):
            raise Exception('startDate is later than endDate')

        if (isinstance(tickerList, str)):
            tickerList = [
                tickerList,
            ]

        if (localCheck != None):
            try:
                localFile = pd.read_csv(localCheck,
                                        index_col='date',
                                        parse_dates=True)
            except:
                raise Exception('Read Local File Error')

            if (np.all([ticker in localFile.columns
                        for ticker in tickerList]) == False):
                raise Exception(
                    '''Local File Columns Doesn't Match Ticker List''')

            # Make sure it's business day
            if (isbday(startDate, holidays=holidays.US()) == False):
                startDateCehck = startDate + relativedelta(bdays=1)
            else:
                startDateCehck = startDate

            if (isbday(endDate, holidays=holidays.US()) == False):
                endDateCehck = endDate + relativedelta(bdays=-1)
            else:
                endDateCehck = endDate

            if (startDateCehck in localFile.index
                    and endDateCehck in localFile.index):
                return localFile[startDate.isoformat():endDate.isoformat()]

            if (startDate < localFile.index[0].date()):
                readStart = startDate
                if (endDate <= localFile.index[-1].date()):
                    readEnd = localFile.index[0].date()
                else:
                    readEnd = endDate
            else:
                readStart = localFile.index[-1].date()
                readEnd = endDate

            tables = []

            for ticker in tickerList:
                # print(ticker)
                tables.append(
                    self.getPrice(ticker,
                                  readStart.isoformat(),
                                  readEnd.isoformat(),
                                  dateAscending=True))
            missingComponents = pd.concat(tables, axis=1)
            localFile = pd.concat([localFile, missingComponents],
                                  axis=0).sort_index(ascending=True)
            localFile = localFile[~localFile.index.duplicated()]
            if (update):
                localFile.to_csv(localCheck, index=True)
            return localFile[startDate.isoformat():endDate.isoformat()]

        tables = []
        for ticker in tickerList:
            tables.append(
                self.getPrice(ticker,
                              startDate.isoformat(),
                              endDate.isoformat(),
                              dateAscending=True))

        return pd.concat(tables, axis=1)
# -*- coding: utf-8 -*-
# This script takes the JSON data handed to it from PHP
# then parses it and sends it to IFTTT
# 
# Pat O'Brien 4/25/2016
#

import requests, sys, syslog, time, datetime, json
import holidays
from bdateutil import isbday

today = (time.strftime("%m/%d/%Y"))
businessDay = isbday(today, holidays=holidays.US())
now = datetime.datetime.now()    
iftttEventName = "YOUR EVENT NAME"
iftttSecretKey = "YOUR KEY"
data_raw = sys.argv[1]
data = json.loads(data_raw)

def logmsg(msg):
    print msg
    syslog.syslog(syslog.LOG_INFO, 'SmartThings Webhooks Notify: %s' % msg )

# Setup variables based on JSON keys
if data.has_key("name"):
    name = data["name"]
else:
    name = ""
    
if data.has_key("display_name"):
    display_name = data["display_name"]
    d = ast.literal_eval(x)
    df = df.append(d, ignore_index=True)
#Creating a new column as a placeholder for now
df['new'] = df['date'] + " " + df['time']
#converts dates to datetime
df['date'] = pd.to_datetime(df['date'])
df['new'] = pd.to_datetime(df['new'])
#If after 4 pm add 1 to the date because it is now the next trading day
for i in range(len(df['new'])):
    if df['new'][i].hour >= 16:
        df['new'][i] += timedelta(days=1)
        df['date'][i] += timedelta(days=1)
#Check to see if date is on a business day
#If it's not aka its on a saturday (no tweet news on sundays) add 2
for j in range(len(df['date'])):
    val = isbday(
        date(df['date'][j].year, df['date'][j].month, df['date'][j].day))
    if val == False:
        df['date'][j] += timedelta(days=2)
#Now dropping our place holder column
df.drop(['new'], axis=1, inplace=True)
df.head()
#stock data imported from Yahoo
start = datetime.datetime(2014, 1, 1)
end = datetime.datetime(2019, 4, 1)
tsla = web.DataReader('NFLX', 'yahoo', start, end)
tsla['Returns'] = tsla['Close'].pct_change(1)
#Getting rid of irrelevant data
tsla.drop(['High', 'Low', 'Open', 'Adj Close', 'Volume'], axis=1, inplace=True)
#New column called NYSE date which is = to the index because the data
#Had the index as a date
tsla['date'] = tsla.index
 def test_isbday(self):
     self.assertFalse(isbday(date(2014, 1, 4)))
     self.assertFalse(isbday("2014-01-04"))
     self.assertTrue(isbday(date(2014, 1, 1)))
     self.assertTrue(isbday("2014-01-01"))
     self.assertFalse(isbday(date(2014, 1, 1), holidays=holidays.US()))
     self.assertTrue(isbday(datetime(2014, 1, 1, 16, 30)))
     self.assertTrue(isbday(datetime(2014, 1, 1, 17, 30)))
     self.assertFalse(isbday(datetime(2014, 1, 1, 16, 30),
                      holidays=holidays.US()))
     self.assertFalse(isbday(datetime(2014, 1, 1, 17, 30),
                      holidays=holidays.US()))
     bdateutil.HOLIDAYS = holidays.Canada()
     self.assertFalse(isbday(date(2014, 7, 1)))
     self.assertTrue(isbday(date(2014, 7, 4)))
     self.assertFalse(isbday(date(2014, 1, 1)))
     self.assertTrue(isbday(date(2014, 7, 1), holidays=holidays.US()))
     self.assertFalse(isbday(date(2014, 7, 4), holidays=holidays.US()))
     bdateutil.HOLIDAYS = []
def excludeCompute(x,y):
    if(x['ClassName'] in y):
        print 'exclude ',x['ClassName'],' by setting nan'
        return np.nan
    else:
        return x['HeadCount']
    
#Data processing

classAttendance['month']       = classAttendance['StartTimeLocal']                                    .apply(lambda x:x.strftime('%Y%m'))
                                #year-month
classAttendance['week']        = classAttendance['StartTimeLocal']                                    .apply(lambda x:x.strftime('%Y%V'))
                                #GroupMon-Sun                                
classAttendance['bday']        = classAttendance['StartTimeLocal']                                    .apply(lambda x:isbday(
                                                    x,
                                                    holidays=LOCALHOLIDAYS))  
classAttendance['date']        = classAttendance['StartTimeLocal']                                    .apply(lambda x:x.strftime('%Y%m%d'))

classAttendance['HourWeekGrp'] = classAttendance                                    .apply(lambda x:getCONSTANT_CLASS_HOURWEEKGROUP(
                                                    x['StartHour'],
                                                    x['Weekday']),
                                                    axis=1)                             
classAttendance['WeekdayGrp']     = classAttendance['Weekday']                                    .apply(lambda x:getWeekdayGroup(x))
classAttendance['HourGrp']     = classAttendance['StartHour']                                    .apply(lambda x:getHourGroup(x)) 

#Remove invalid data
classAttendance                = classAttendance                                    .loc[np.logical_not(
                                        (classAttendance['bday']==False) 
                                        & (classAttendance['HeadCount']==0)
                                        )]
Beispiel #23
0
 def test_isbday(self):
     self.assertFalse(isbday(date(2014, 1, 4)))
     self.assertFalse(isbday("2014-01-04"))
     self.assertTrue(isbday(date(2014, 1, 1)))
     self.assertTrue(isbday("2014-01-01"))
     self.assertFalse(isbday(date(2014, 1, 1), holidays=holidays.US()))
     self.assertTrue(isbday(datetime(2014, 1, 1, 16, 30)))
     self.assertTrue(isbday(datetime(2014, 1, 1, 17, 30)))
     self.assertFalse(
         isbday(datetime(2014, 1, 1, 16, 30), holidays=holidays.US()))
     self.assertFalse(
         isbday(datetime(2014, 1, 1, 17, 30), holidays=holidays.US()))
     isbday.holidays = holidays.US()
     self.assertFalse(isbday(date(2014, 1, 1)))
     self.assertFalse(isbday(date(2014, 7, 4)))
     self.assertTrue(isbday(date(2014, 7, 4), holidays=holidays.CA()))
Beispiel #24
0
 def test_isbday(self):
     self.assertFalse(isbday(date(2014, 1, 4)))
     self.assertFalse(isbday("2014-01-04"))
     self.assertTrue(isbday(date(2014, 1, 1)))
     self.assertTrue(isbday("2014-01-01"))
     self.assertFalse(isbday(date(2014, 1, 1), holidays=holidays.US()))
     self.assertTrue(isbday(datetime(2014, 1, 1, 16, 30)))
     self.assertTrue(isbday(datetime(2014, 1, 1, 17, 30)))
     self.assertFalse(isbday(datetime(2014, 1, 1, 16, 30),
                      holidays=holidays.US()))
     self.assertFalse(isbday(datetime(2014, 1, 1, 17, 30),
                      holidays=holidays.US()))
     isbday.holidays = holidays.US()
     self.assertFalse(isbday(date(2014, 1, 1)))
     self.assertFalse(isbday(date(2014, 7, 4)))
     self.assertTrue(isbday(date(2014, 7, 4), holidays=holidays.CA()))
Beispiel #25
0
def isBDay():

    today = datetime.now()

    return isbday(today, holidays=holidays.US())
Beispiel #26
0
def recentbday(date):#most recent business day that is not today
    date -=timedelta(1)
    while not isbday(date,holidays=holidays.US()):
        date -=timedelta(1)
    return date.strftime('%Y-%m-%d')