def setUp(self):
     self.holidays = holidays.England()
     self.holidays = holidays.Wales()
     self.holidays = holidays.Scotland()
     self.holidays = holidays.IsleOfMan()
     self.holidays = holidays.NorthernIreland()
     self.holidays = holidays.UK()
def date_range(start_date, end_date, weekdays=None, exclude_holidays=True):
    """
    Generate a list of all dates within the given period

    Parameters
    ----------
    start_date : datetime.date object
        Starting date of the period
    end_date : datetime.date object
        Ending date of the period
    weekdays : list
        If specified, constrain to these days of the week only, e.g., ['Tuesday', 'Friday']
        
    Returns
    -------
    rng : list
        List of dates in the format of datetime.date
    """
    rng = []
    d = start_date
    while d <= end_date:
        if weekdays is None or list(
                calendar.day_name)[d.weekday()] in weekdays:
            if not exclude_holidays or d not in holidays.UK():
                rng.append(d)
        d += timedelta(days=1)
    return rng
Exemple #3
0
def get_dates(df, date_column):
    """Converts a given date to various formats and returns an updated DataFrame.

    Args:
        df: Pandas DataFrame.
        date_column:

    Returns:
        Original DataFrame with additional date columns.
    """

    df['day'] = df[date_column].dt.strftime("%d").astype(int)  # Day of month with leading zero
    df['month'] = df[date_column].dt.strftime("%m").astype(int)  # Month of year with leading zero
    df['year'] = df[date_column].dt.strftime("%Y").astype(int)  # Full numeric four digit year
    df['year_month'] = df[date_column].dt.strftime("%Y%m").astype(int)  # Full numeric four digit year plus month
    df['week_number'] = df[date_column].dt.strftime("%U").astype(int)  # Week number with leading zero
    df['day_number'] = df[date_column].dt.strftime("%j").astype(int)  # Day number with leading zero
    df['day_name'] = df[date_column].dt.strftime("%A")  # Day name, i.e. Sunday
    df['month_name'] = df[date_column].dt.strftime("%B")  # Month name, i.e. January
    df['mysql_date'] = df[date_column].dt.strftime("%Y-%d-%m")  # MySQL date, i.e. 2020-30-01
    df['quarter'] = df[date_column].dt.quarter.astype(int)  # Quarter with leading zero, i.e. 01
    df['week_day_number'] = df[date_column].dt.strftime("%w").astype(int)  # Weekday number, i.e. 0 = Sunday, 1 = Monday
    df['is_weekend'] = ((pd.DatetimeIndex(df['date']).dayofweek) // 5 == 1).astype(int)  # 1 if weekend, 0 if weekday

    uk_holidays = holidays.UK()
    df['is_uk_holiday'] = np.where(df.date.isin(uk_holidays), 1, 0).astype(int)  # Return 1 if this is a holiday

    return df
Exemple #4
0
def contract_expiry(contract_date):
    UK = sorted(holidays.UK(state=None, years=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]).items())
    holiday_list = []
    for d, n in UK:
        holiday_list.append(d)
    date_exp = workday(contract_date, -2, holiday_list)
    return date_exp
Exemple #5
0
def make_uk_holidays(start=1991, end=2040) -> pd.DataFrame:
    """return a DataFrame of all uk holidays (including easter sunday)"""
    uk = holidays.UK(years=list(range(start, end)))
    uk_easter = {dt: nm for dt, nm in uk.items() if 'Easter Monday' in nm}
    uk_easter_sundays = [(dt - timedelta(days=1), 'Easter Sunday')
                         for dt in uk_easter.keys()]
    uk = pd.DataFrame(list(uk.items()) + uk_easter_sundays,
                      columns=['ds', 'holiday'])
    uk['ds'] = pd.to_datetime(uk['ds'])
    return uk
Exemple #6
0
def add_holiday(x):
    time_range = pd.date_range(start='2015-12-31', end='2019-01-01', freq='h')
    country_holidays = {'UK': holidays.UK(), 'US': holidays.US(), 'IRL': holidays.Ireland(), 'CAN': holidays.Canada()}

    holiday_mapping = pd.DataFrame()
    for site in range(16):
        holiday_mapping_i = pd.DataFrame({'site': site, 'timestamp': time_range})
        holiday_mapping_i['h0'] = holiday_mapping_i['timestamp'].apply(
            lambda x: x in country_holidays[locate[site]['country']]).astype(int)
        holiday_mapping = pd.concat([holiday_mapping, holiday_mapping_i], axis=0)

    x = pd.merge([x, holiday_mapping], on=['site', 'timestamp'], how='left')
    return x
Exemple #7
0
def holiday_adjust(trade_date, delta):
    forward_date = trade_date + delta
    year = forward_date.year
    # if trade_date is holiday
    if (forward_date in hol.Australia() or forward_date in hol.US()
            or forward_date in hol.UK() or forward_date in hol.Japan()):
        forward_date = forward_date + dt.timedelta(days=1)
        holiday_adjust(forward_date, dt.timedelta())
    # date is weekend
    elif forward_date.weekday() >= 5:
        forward_date = forward_date + dt.timedelta(days=1)
        holiday_adjust(forward_date, dt.timedelta())
    return forward_date
 def test_all_holidays_present(self):
     uk_2015 = holidays.UK(years=[2015])
     all_holidays = [
         "New Year's Day",
         "Good Friday",
         "Easter Monday [England/Wales/Northern Ireland]",
         "May Day",
         "Spring Bank Holiday",
         "Late Summer Bank Holiday [England/Wales/Northern Ireland]",
         "Christmas Day",
         "Boxing Day",
         "St. Patrick's Day [Northern Ireland]",
     ]
     for holiday in all_holidays:
         self.assertIn(holiday, uk_2015.values())
    def send_daily_notifications(self):
        """
        Send daily summary mail notification. The opposite of notify_user()
        :param self:
        :return:
        """
        # test whether today is a working day (a "business day" or "bday"), and if not then bail out;
        # we don't want to bother people with emails at the weekend or on statutory holidays
        today = date.today()
        holiday_calendar = holidays.UK()

        # the test is in two parts: first we check for a holiday, then for a conventional working day
        # (in future perhaps allow individual users to choose their own working-day pattern).
        # Annoyingly, numpy.is_busday() won't accept objects generated by the holidays module
        # as a holiday calendar (it wants an array-like of datetime)

        # is today a UK holiday?
        if today in holiday_calendar:
            return

        # is today a working day?
        if not is_busday(today, holidays=None):
            return

        # search through all active users and dispatch notifications
        # we treat students and faculty slightly differently so we have different dispatchers for them

        # find all students
        students = db.session.query(User).filter(
            User.active == True, User.roles.any(Role.name == 'student')).all()

        student_tasks = group(
            dispatch_student_notifications.si(r.id) for r in students
            if r is not None)

        # find all faculty
        faculty = db.session.query(User).filter(
            User.active == True, User.roles.any(Role.name == 'faculty')).all()

        faculty_tasks = group(
            dispatch_faculty_notifications.si(r.id) for r in faculty
            if r is not None)

        task = group(student_tasks, faculty_tasks)
        raise self.replace(task)
Exemple #10
0
def find_good_epics():
    spreads_and_epics = []
    i_count = 0
    pick_from_epics = []
    full_hol_list = []
    ###################################################################
    tz = pytz.timezone('Europe/Berlin')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    ger_today = str(str("GER_" + str(todays_date)))
    print("Europe/Berlin :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Europe/London')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    gb_today = str(str("GB_" + str(todays_date)))
    print("Europe/London :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('America/New_York')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    us_today = str(str("US_" + str(todays_date)))
    print("America/New_York :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Australia/Sydney')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    aus_today = str(str("AUS_" + str(todays_date)))
    print("Australia/Sydney :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Asia/Tokyo')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    jp_today = str(str("JP_" + str(todays_date)))
    print("Asia/Tokyo :- Today's Date is ..." + str(todays_date))
    ###################################################################
    b_ger_hol = False
    b_uk_hol = False
    b_us_hol = False
    b_aus_hol = False
    b_jp_hol = False

    for date, name in sorted(holidays.DE(years=YEAR_var).items()):
        full_hol_list.append(str("GER_" + str(date)))
    for date, name in sorted(holidays.UK(years=YEAR_var).items()):
        full_hol_list.append(str("GB_" + str(date)))
    for date, name in sorted(holidays.US(years=YEAR_var).items()):
        full_hol_list.append(str("US_" + str(date)))
    for date, name in sorted(holidays.AU(years=YEAR_var).items()):
        full_hol_list.append(str("AUS_" + str(date)))
    for date, name in sorted(holidays.JP(years=YEAR_var).items()):
        full_hol_list.append(str("JP_" + str(date)))

    full_hol_list = sorted(full_hol_list)

    for d in full_hol_list:
        #print (d)
        if str(d) == ger_today:
            b_ger_hol = True
        if str(d) == gb_today:
            b_uk_hol = True
        if str(d) == us_today:
            b_us_hol = True
        if str(d) == aus_today:
            b_aus_hol = True
        if str(d) == jp_today:
            b_jp_hol = True

    for epic_id in main_epic_ids:
        tmp_lst = []
        base_url = REAL_OR_NO_REAL + '/markets/' + epic_id
        auth_r = requests.get(base_url, headers=authenticated_headers)
        d = json.loads(auth_r.text)

        try:
            i_count = i_count + 1
            if epic_id.find('MXN') != -1:
                #print("!!DEBUG!!...skipping, FOUND MXN in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('SEK') != -1:
                #print("!!DEBUG!!...skipping, FOUND SEK in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('NOK') != -1:
                #print("!!DEBUG!!...skipping, FOUND NOK in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('CNH') != -1:
                #print("!!DEBUG!!...skipping, FOUND CNH in..." + str(epic_id))
                time.sleep(1)
            else:
                b_TRADE_OK = True
                if b_TRADE_OK:

                    current_bid = d['snapshot']['bid']
                    ask_price = d['snapshot']['offer']
                    spread = float(current_bid) - float(ask_price)
                    if float(spread) >= -1.51:
                        # tmp_lst.append(epic_id)
                        # spreads_and_epics.append(tmp_lst)
                        pick_from_epics.append(epic_id)
                        # print ("bid : " + str(current_bid))
                        # print ("ask : " + str(ask_price))
                        # print ("-------------------------")
                        # print ("spread : " + str(spread))
                        # print ("-------------------------")
                        print(
                            "!!DEBUG!!...FOUND GOOD EPIC {} spread {}...{}/{}".
                            format(epic_id, spread, i_count,
                                   len(main_epic_ids)))
                        time.sleep(1)
                    else:
                        print(
                            "!!DEBUG!!...skipping, NO GOOD EPIC {} spread {} ....Checking next epic spreads...{}/{}"
                            .format(epic_id, spread, i_count,
                                    len(main_epic_ids)))
                        time.sleep(1)
                        continue
                else:
                    print(
                        "!!DEBUG!!...skipping, NOT CURRENTLY TRADEABLE EPIC {} ....Checking next epic spreads...{}/{}"
                        .format(epic_id, i_count, len(main_epic_ids)))

        except Exception as e:
            print(e)
            pass

    return (pick_from_epics)
Exemple #11
0
def featureCreation(feed,
                    window,
                    h,
                    grouper,
                    dataDir,
                    apiDic,
                    r_id=None,
                    longestfeed=False):
    import pandas as pd
    from bdateutil import isbday
    import holidays
    from sklearn.decomposition import PCA

    feed = pd.DataFrame(feed)
    r_lat_long = apiDic.loc[(apiDic['id'] == int(r_id)), 'lat_long'][1]

    # Quarter of hour
    counter = 0
    array = []
    for i in pd.date_range('00:00', '23:45', freq=grouper):
        feed.loc[(feed.index.hour == i.hour) & (feed.index.minute == i.minute),
                 grouper] = counter
        array.append(feed.loc[feed[grouper] == counter].values)
        counter += 1

# Hour of day
    feed['hourofday'] = feed.index.hour

    # Day of week
    feed['dayofweek'] = feed.index.dayofweek

    # Month
    feed['month'] = feed.index.month

    # Working day
    f = np.vectorize(lambda x: isbday(
        x, holidays=holidays.UK(years=[2013, 2014, 2015, 2016, 2017])))
    feed['isworkingday'] = f(feed.index.date)

    # Weather data
    weather = pd.DataFrame.from_csv(
        os.path.join(dataDir, 'WEATHER_DATA',
                     '%s.csv' % r_lat_long.replace(" ", "")))

    # Converting text date into datetime
    weather['cleandate'] = weather['utcdate'].apply(lambda x: evalDate(x))

    weather.index = weather['cleandate']

    # Deleting irrelevant columns
    if 'date' in weather.columns:
        del weather['date']

    if 'date.1' in weather.columns:
        del weather['date.1']

    if 'utcdate' in weather.columns:
        del weather['utcdate']

    if 'Unnamed: 0' in weather.columns:
        del weather['Unnamed: 0']

    # Droping duplicates
    weather = weather.drop_duplicates(subset='cleandate')

    weather = weather.reindex(
        pd.date_range(weather['cleandate'].min(),
                      weather['cleandate'].max(),
                      freq=grouper))  #, method='backfill')

    weather = weather.loc[:,
                          ('conds', 'dewptm', 'fog', 'hail', 'hum', 'precipm',
                           'pressurem', 'rain', 'snow', 'tempm', 'thunder',
                           'wdire', 'wgustm', 'windchillm', 'wspdm')]

    weather.loc[:, 'conds'] = weather.loc[:, 'conds'].fillna('Unknown')
    weather.loc[:, 'wdire'] = weather.loc[:, 'wdire'].fillna('Variable')

    le = le2 = preprocessing.LabelEncoder()
    le.fit(weather['conds'])
    weather['conds'] = le.transform(weather['conds'])
    le2.fit(weather['wdire'])
    weather['wdire'] = le2.transform(weather['wdire'])

    weather.replace([-9999.0, -999.0], [np.nan, np.nan], inplace=True)

    weather.loc[:, ('precipm', 'wgustm')] = weather.loc[:,
                                                        ('precipm',
                                                         'wgustm')].fillna(0)

    weather.windchillm = weather.windchillm.fillna(weather.tempm)

    weather = weather.interpolate()

    if (weather.index.min() < feed.index.min()):
        if (weather.index.max() < feed.index.max()):
            weather = weather.ix[feed.index.min():, :]
            feed = feed.ix[:weather.index.max(), :]
        else:
            weather = weather.ix[feed.index.min():feed.index.max(), :]
    else:
        if (weather.index.max() < feed.index.max()):
            feed = feed.ix[weather.index.min():weather.index.max(), :]
        else:
            feed = feed.ix[weather.index.min():, :]
            weather = weather.ix[:feed.index.max(), :]

    features, response = mlf.ts_to_mimo(feed.ix[:, 0], window, h)

    n_factors = 1
    pca = PCA(n_components=n_factors)
    pca.fit(weather)
    while (pca.explained_variance_ratio_.sum() < 0.99):
        n_factors += 1
        pca = PCA(n_components=n_factors)
        pca.fit(weather)

    reduced = pd.DataFrame(pca.transform(weather))

    c = np.zeros((features.shape[0], (h * len(reduced.columns))))

    for column in range(len(reduced.columns)):
        c[:, (column * h):((1 + column) * h)] = mlf.weather_to_mimo(
            reduced.ix[:, column], window, h)

    features = np.concatenate((feed.ix[(window + h - 1):,
                                       ('isworkingday', grouper, 'hourofday',
                                        'dayofweek', 'month')], c, features),
                              axis=1)

    print('Features created')

    return (features, response)
Exemple #12
0
    settings["country_last"] = country_last
    settings.flush()
    return country_last


country_holidays = {
    "CA": holidays.CA(),
    "CO": holidays.CO(),
    "MX": holidays.MX(),
    "US": holidays.US(),
    "NZ": holidays.NZ(),
    "AU": holidays.AU(),
    "DE": holidays.DE(),
    "AT": holidays.AT(),
    "DK": holidays.DK(),
    "UK": holidays.UK(),
    "IE": holidays.IE(),
    "ES": holidays.ES(),
    "CZ": holidays.CZ(),
    "SK": holidays.SK(),
    "PL": holidays.PL(),
    "PT": holidays.PT(),
    "NL": holidays.NL(),
    "NO": holidays.NO(),
    "IT": holidays.IT(),
    "SE": holidays.SE(),
    "JP": holidays.JP(),
    "BE": holidays.BE(),
    "ZA": holidays.ZA(),
    "SI": holidays.SI(),
    "FI": holidays.FI(),
Exemple #13
0
def find_good_epics():
    spreads_and_epics = []
    i_count = 0
    pick_from_epics = []
    full_hol_list = []
    ###################################################################
    tz = pytz.timezone('Europe/Berlin')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    ger_today = str(str("GER_" + str(todays_date)))
    print("Europe/Berlin :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Europe/London')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    gb_today = str(str("GB_" + str(todays_date)))
    print("Europe/London :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('America/New_York')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    us_today = str(str("US_" + str(todays_date)))
    print("America/New_York :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Australia/Sydney')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    aus_today = str(str("AUS_" + str(todays_date)))
    print("Australia/Sydney :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Asia/Tokyo')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    jp_today = str(str("JP_" + str(todays_date)))
    print("Asia/Tokyo :- Today's Date is ..." + str(todays_date))
    ###################################################################
    b_ger_hol = False
    b_uk_hol = False
    b_us_hol = False
    b_aus_hol = False
    b_jp_hol = False

    for date, name in sorted(holidays.DE(years=YEAR_var).items()):
        full_hol_list.append(str("GER_" + str(date)))
    for date, name in sorted(holidays.UK(years=YEAR_var).items()):
        full_hol_list.append(str("GB_" + str(date)))
    for date, name in sorted(holidays.US(years=YEAR_var).items()):
        full_hol_list.append(str("US_" + str(date)))
    for date, name in sorted(holidays.AU(years=YEAR_var).items()):
        full_hol_list.append(str("AUS_" + str(date)))
    for date, name in sorted(holidays.JP(years=YEAR_var).items()):
        full_hol_list.append(str("JP_" + str(date)))

    full_hol_list = sorted(full_hol_list)

    for d in full_hol_list:
        #print (d)
        if str(d) == ger_today:
            b_ger_hol = True
        if str(d) == gb_today:
            b_uk_hol = True
        if str(d) == us_today:
            b_us_hol = True
        if str(d) == aus_today:
            b_aus_hol = True
        if str(d) == jp_today:
            b_jp_hol = True

    for epic_id in main_epic_ids:
        tmp_lst = []
        base_url = REAL_OR_NO_REAL + '/markets/' + epic_id
        auth_r = requests.get(base_url, headers=authenticated_headers)
        d = json.loads(auth_r.text)

        try:
            i_count = i_count + 1
            if epic_id.find('MXN') != -1:
                #print("!!DEBUG!!...skipping, FOUND MXN in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('SEK') != -1:
                #print("!!DEBUG!!...skipping, FOUND SEK in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('NOK') != -1:
                #print("!!DEBUG!!...skipping, FOUND NOK in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('CNH') != -1:
                #print("!!DEBUG!!...skipping, FOUND CNH in..." + str(epic_id))
                time.sleep(1)
            else:
                b_TRADE_OK = False
                while True:

                    ###################EUROPE############################
                    ###################EUROPE############################
                    ###################EUROPE############################
                    tz = pytz.timezone('Europe/Berlin')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    #print ("!!DEBUG!! Europe/Berlin:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...FRANKFURT MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "EUR"
                        if STR_CHECK in epic_id and b_ger_hol == False:
                            b_TRADE_OK = True
                            break
                    ###################LONDON############################
                    ###################LONDON############################
                    ###################LONDON############################
                    tz = pytz.timezone('Europe/London')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    while True:
                        if is_between(str(now_time), ("22:00", "22:59")):
                            time.sleep(1)  # Sleeping for the tally up hour
                            print("!!DEBUG!! Tally Up hour:" + str(now_time))
                            now_time = datetime.datetime.now(
                                tz=tz).strftime('%H:%M')
                        else:
                            break
                    #print ("!!DEBUG!! Europe/London:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...LONDON MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "GBP"
                        if STR_CHECK in epic_id and b_uk_hol == False:
                            b_TRADE_OK = True
                            break
                    ###################NY############################
                    ###################NY############################
                    ###################NY############################
                    tz = pytz.timezone('America/New_York')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    #print ("!!DEBUG!! America/New_York:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...NEW YORK MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "USD"
                        if STR_CHECK in epic_id and b_us_hol == False:
                            b_TRADE_OK = True
                            break
                    ###################AUS############################
                    ###################AUS############################
                    ###################AUS############################
                    tz = pytz.timezone('Australia/Sydney')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    #print ("!!DEBUG!! Australia/Sydney:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...SYDNEY MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "AUD"
                        if STR_CHECK in epic_id and b_aus_hol == False:
                            b_TRADE_OK = True
                            break
                    ###################TOKYO############################
                    ###################TOKYO############################
                    ###################TOKYO############################
                    tz = pytz.timezone('Asia/Tokyo')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    #print ("!!DEBUG!! Asia/Tokyo:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...TOKYO MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "JPY"
                        if STR_CHECK in epic_id and b_jp_hol == False:
                            b_TRADE_OK = True
                            break
                    break

                if b_TRADE_OK:

                    current_bid = d['snapshot']['bid']
                    ask_price = d['snapshot']['offer']
                    spread = float(current_bid) - float(ask_price)
                    if float(spread) >= -1:
                        # tmp_lst.append(epic_id)
                        # spreads_and_epics.append(tmp_lst)
                        pick_from_epics.append(epic_id)
                        # print ("bid : " + str(current_bid))
                        # print ("ask : " + str(ask_price))
                        # print ("-------------------------")
                        # print ("spread : " + str(spread))
                        # print ("-------------------------")
                        print("!!DEBUG!!...FOUND GOOD EPIC..." + str(i_count) +
                              "/" + str(len(main_epic_ids)))
                        time.sleep(1)
                    else:
                        print(
                            "!!DEBUG!!...skipping, NO GOOD EPIC....Checking next epic spreads..."
                            + str(i_count) + "/" + str(len(main_epic_ids)))
                        time.sleep(1)
                        continue

        except Exception as e:
            print(e)
            pass

    return (pick_from_epics)
def get_data():
    #print("inside POST")
    data = request.form
    data_type = data['select']
    date = data['datetime']
    time = int(data['time'])
    temp = data['temp']
    humidity = data['humidity']
    dewpoint = data['dewpoint']

    #print("read form completed")
    date_value = date
    dt = datetime.strptime(date_value, '%Y-%m-%d')

    #month
    month_value = dt.month
    #Weekday
    #Monday is 0 and Sunday is 6
    if dt.weekday() == 5 or dt.weekday() == 6:
        week_day = 0
    else:
        week_day = 1

    #Base Hour Flag
    if time > 4 and time < 22:
        Base_hour_Flag = "false"
    else:
        Base_hour_Flag = "true"

    #Holiday
    us_holidays = holidays.UK(years=dt.year)
    if dt in us_holidays:
        Holiday = 1
    else:
        Holiday = 0

    resultGlm = process(algo="glm",
                        Base_hour_Flag=Base_hour_Flag,
                        Holiday=Holiday,
                        week_day=week_day,
                        temp=temp,
                        humidity=humidity,
                        data_type=data_type,
                        dewpoint=dewpoint,
                        month_value=month_value)
    resultForest = process(algo="forest",
                           Base_hour_Flag=Base_hour_Flag,
                           Holiday=Holiday,
                           week_day=week_day,
                           temp=temp,
                           humidity=humidity,
                           data_type=data_type,
                           dewpoint=dewpoint,
                           month_value=month_value)
    resultTree = process(algo="tree",
                         Base_hour_Flag=Base_hour_Flag,
                         Holiday=Holiday,
                         week_day=week_day,
                         temp=temp,
                         humidity=humidity,
                         data_type=data_type,
                         dewpoint=dewpoint,
                         month_value=month_value)
    resultNN = process(algo="nn",
                       Base_hour_Flag=Base_hour_Flag,
                       Holiday=Holiday,
                       week_day=week_day,
                       temp=temp,
                       humidity=humidity,
                       data_type=data_type,
                       dewpoint=dewpoint,
                       month_value=month_value)

    resultGlm = json.loads(resultGlm)
    resultForest = json.loads(resultForest)
    resultTree = json.loads(resultTree)
    resultNN = json.loads(resultNN)

    return render_template(
        '/classify.html',
        humidity=humidity,
        date=date,
        hour=time,
        temp=temp,
        dewPoint=dewpoint,
        labelGlm=resultGlm['Results']['output1'][0]['Scored Labels'],
        labelForest=resultForest['Results']['output1'][0]['Scored Labels'],
        labelNN=resultNN['Results']['output1'][0]['Scored Labels'],
        labelTree=resultTree['Results']['output1'][0]['Scored Labels'])
Exemple #15
0
from pycm import *
import seaborn as sns

from imblearn.pipeline import make_pipeline
from imblearn import under_sampling
from imblearn import over_sampling
from imblearn import combine
from imblearn.over_sampling import SMOTE

import matplotlib.cm as cm
from matplotlib.colors import Normalize
import matplotlib as mpl

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore')
us_holidays = holidays.UnitedStates()
ca_holidays = holidays.CA()
uk_holidays = holidays.UK()

random_state = 123456
np.random.seed(random_state)

cwd = str(os.getcwd())
sys.path.append(cwd)
sys.path.insert(0, cwd)

scaler_fs = [
    MinMaxScaler, MaxAbsScaler, StandardScaler, RobustScaler,
    QuantileTransformer, PowerTransformer
]
scalers = dict(zip([scaler.__name__ for scaler in scaler_fs], scaler_fs))
    def validate(cls,args,data, rules=[], **kwargs):
        """
        This function performs data validation and saves invalid data records
        to a table. It works as a sort of filter for the calling script, by
          not allowing invalid data to be inserted into the database.
          It receives a set of data, performs validations on each record and
          on the whole set, and returns a tuple with the valid and invalid
           data to the calling script.

        First, the function will perform some generic data validation rules
        and then it will perform the data validation rules supplied as an
        argument.

        Args:
            - data (string): JSON string containing the data records to
            validate. It should be of the form:
                '{"data":[
                            {"column1":"value1",
                            "column2":"value2",
                            "column3":"value3"
                            ...},
                            {"column1":"value1",
                            "column2":"value2",
                            "column3":"value3"
                            ...},
                        ]

                }'

            - rules (list): list of functions, where each function takes a JSON string
          containing the data records to validate, performs a validation
            rule and returns a tuple, where the first element is the collection of valid
          records, and the second element is the collection of invalid records. These
          records should be returned as lists of
          dict with the data; and in the case of invalid records, the data will be a JSON
          String, and the dict will be
          augmented with the reason, the rule, the script, and the time of
          Validation. For example, the collection of invalid records might look like this:

          [{"data":'{"column1":"value1"...}', "rule":rule_value,
          "reason":reason_value, "script":script_value,
          "date_of_validation":date_value
          },
          {"data":'{"column1":"value1"...}', "rule":rule_value,
          "reason":reason_value, "script":script_value,
          "date_of_validation":date_value
          },
          ...
          ]

          The collection of valid records will be simpler, just a list of dict, e.g.
          [{"column1":value1, "column2":value2},
           {"column1":value1, "column2":value2}
          ]

            - kwargs:
                - 'google_key_path': the path to the service account key.

        Returns:
            tuple: tuple with: a list of dict with the valid records as the first element,
            and a list of dict with the invalid records as the second element.
            Each element of the invalid list is a tuple, containing the actual data (dict) as its first element,
            and another dict as its second element, with additional information like rule, reason, script,
            date_of_calidation and last_update_date. Exmple:

            Validate would return:

            (valid_list,invalid_list)

            where valid_list is:
            [
              {"column1":value1, "column2":value2...},
              {"column1":value1, "column2":value2...},
              ...
            ]

            where invalid_list:

            [
             (valid_record, additional_info),
             (valid_record,additional_info),
             ...
            ]

            where valid_record is:

            {"column1":value1, "column2":value2...}

            where additional_info is:

            {"rule":rule_value,
             "reason":reason_value,
             "script":script_value,
             "date_of_validation":date_of_validation_value,
             "last_update_date":last_update_date_value
             }

        """
        def rule_1(row, tc, ntc, uk, de, script):
            for c in ntc:
                if row[c] is not None and row[c] != 0:
                    return "valid"

            #All values were 0
            #Check if they are holiday or weekend
            for c in tc:
                try:
                    parsed = datetime.strptime(row[c], '%Y-%m-%d %H:%M:%S')
                except Exception as e:
                    continue
                parsed_date = str(parsed.date())
                if parsed_date in uk or parsed_date in de or not parsed.weekday():
                    if "price" in script:
                        return "valid"
                    else:
                        rule_list.add("rule_1")
                        return "invalid"

            #Not weekend or holiday
            rule_list.add("rule_1")
            return "invalid"

        def rule_4(row,nsc):
            for c in nsc:
                if row[c] != 0 and row[c] is not None:
                    return "valid"

            rule_list.add("rule_4")
            return "invalid"

        data_decoded = jsonpickle.decode(data)
        valid_data = []
        invalid_data = []

        rule_list = set()
        reasons = {
            "rule_1": "Rule 1: all columns (except timestamp) are null or 0.",
            "rule_2": "Rule 2: the 4000 previously-validated records are identical.",
            "rule_3": "Rule 3: the same column had value of 0 or NULL for the last 5 rows.",
            "rule_4": "Rule 4: standard columns like Constituent name, id, date have values but all others are 0."
        }

        #First, perform generic data validation rules
        #Rule 1: check if all columns (except timestamp) are null or 0
        #Get timestamp and non-timestamp columns
        script = kwargs["script"]
        uk_holidays = holidays.UK()
        de_holidays = holidays.Germany()
        df = pd.DataFrame(data_decoded["data"])
        standard_columns = ['constituent_name', "constituent_id", "date", "last_update_date"]
        original_columns = df.columns
        timestamp_columns = [c for c in df.columns if ("date" in c or "time" in c)]
        non_timestamp_columns = [c for c in df.columns if ("date" not in c and "time" not in c)]
        non_standard_columns = [c for c in df.columns if c not in standard_columns + timestamp_columns]

        df["rule_1"] = df.apply(lambda x: rule_1(x,timestamp_columns,non_timestamp_columns,uk_holidays,
                                                 de_holidays,script), axis=1)

        #Rule 2: Are the 4000 previously-validated records identical?
        invalid_indices = set()
        if df.shape[0] >= 4000:
            #We can apply rule
            start = 0
            end = 3999
            while end < df.shape[0]:
                df_temp = df.loc[start:end]
                df_duplicates = df_temp[df_temp.duplicated(subset=non_timestamp_columns, keep=False)]
                invalid_indices.update(list(df_duplicates.index))

                start += 1
                end += 1

        df["rule_2"] = "valid"
        df.loc[list(invalid_indices), "rule_2"] = "invalid"

        if len(invalid_indices) > 0:
            rule_list.add("rule_2")

        #Rule 3: Has the same column had value of 0 or NULL for the last 5 rows?
        invalid_indices_2 = set()
        start = 0
        end = 4
        while end < df.shape[0]:
            for c in non_timestamp_columns:
                series = df.loc[start:end][c]
                if series.any() == False:
                    invalid_indices_2.update(list(series.index))

            start += 1
            end += 1

        df["rule_3"] = "valid"
        df.loc[list(invalid_indices_2), "rule_3"] = "invalid"

        if len(invalid_indices_2) > 0:
            rule_list.add("rule_3")

        #Rule 4: If standard columns like Constituent name, id, date have values but all others are 0 reject?

        df["rule_4"] = df.apply(lambda x: rule_4(x,non_standard_columns), axis=1)

        df["rule"] = object

        #Get invalid records
        invalid_indices = set()

        for i in range(0, df.shape[0]):
            row_rules = []
            row = df.iloc[i]

            for r in ["rule_1","rule_2", "rule_3", "rule_4"]:
                if row[r] == "invalid":
                    invalid_indices.add(i)
                    row_rules.append(r)

            df.at[i, 'rule'] = row_rules

        #Get valid indices
        valid_indices = set(df.index.tolist())
        valid_indices = valid_indices.difference(invalid_indices)

        valid_data += df.loc[list(valid_indices)][original_columns.tolist() + ["rule"]].to_dict(orient='records')
        invalid_data += df.loc[list(invalid_indices)][original_columns.tolist() + ["rule"]].to_dict(orient='records')

        #Custom rules
        custom_invalid = []
        for func in rules:
            valid, invalid = func(jsonpickle.encode({"data":valid_data}))
            #print(invalid)
            #Check if valid
            valid_data = valid
            custom_invalid += invalid

        #Format invalid data
        invalid_data_store = []

        #Add rule and reason
        for item in invalid_data:
            if "rule_1" in item["rule"] or "rule_4" in item["rule"]:
                continue
            additional_info = {}
            additional_info["rule"] = item["rule"]
            additional_info["reason"] = [reasons[r] for r in item["rule"] if isinstance(item["rule"], list)]
            additional_info["script"] = script
            additional_info["date_of_validation"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            additional_info["last_update_date"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            invalid_data_store.append((item,additional_info))

        invalid_data_store += custom_invalid

        return list(valid_data), invalid_data_store