def create_model(self, vocab_size, max_len):

        # # load saved model architecture
        # json_file = open('model.json', 'r')
        # loaded_model_json = json_file.read()
        # json_file.close()

        # loaded_model = tf.keras.models.model_from_json(loaded_model_json)

        # # load saved model weights
        # loaded_model.load_weights('model.h5')

        # embedding_layer = loaded_model.layers[0]

        # embedding_layer.trainable = False

        self.model = keras.Sequential()
        self.model.add(keras.layers.Embedding(vocab_size, CONFIG.getint('DEFAULT', 'EMBEDDING_OUTPUT'), 
        input_length=max_len))
        #self.model.add(embedding_layer)
        self.model.add(keras.layers.GlobalAveragePooling1D())
        #self.model.add(keras.layers.Flatten())
        #self.model.add(keras.layers.Dropout(0.5))
        self.model.add(keras.layers.Dense(CONFIG.getint('DEFAULT', 'HIDDEN'), activation=tf.nn.relu))
        #self.model.add(keras.layers.Dropout(0.5))
        # self.model.add(keras.layers.Dense(32, activation=tf.nn.relu))
        # self.model.add(keras.layers.Dropout(0.5))
        self.model.add(keras.layers.Dense(CONFIG.getint('DEFAULT', 'OUTPUT'), activation=tf.nn.softmax))
Ejemplo n.º 2
0
def get_max_min_rsi(trade):
    """
    Function to calculate the max or min RSI for CandleList slice
    going from trade.start-CONFIG.getint('counter', rsi_period') to trade.start.

    Returns
    -------
    float : The max (if short trade) or min (long trade) rsi value
            in the candlelist
    """
    t_logger.debug("Running set_max_min_rsi")

    ix = CONFIG.getint('counter', 'rsi_period')
    sub_clist = trade.period.data['candles'][-ix:]
    rsi_list = [x['rsi'] for x in sub_clist]
    first = None
    for x in reversed(rsi_list):
        if first is None:
            first = x
        elif trade.type == 'short':
            if x > first:
                first = x
        elif trade.type == 'long':
            if x < first:
                first = x

    t_logger.debug("Done set_max_min_rsi")

    return round(first, 2)
Ejemplo n.º 3
0
def prepare_data():

    num_csv_rows = CONFIG.getint('DEFAULT', 'TRAINING_SIZE')

    max_len = 140

    train = read_csv(num_csv_rows)

    trainX, trainY = train['tweet'].values, train['polarity'].values

    trainY = np.where(trainY == 4, 1, trainY)

    preprocessor = Preprocessor()

    trainX = preprocessor.clean_texts(trainX)

    vocab_size = CONFIG.getint('DEFAULT', 'VOCAB_SIZE')

    # set number of most frequent words to consider and specify token to be used for any word outside of this number
    tokenizer = Tokenizer(num_words=vocab_size, oov_token="<UNUSED>")

    tokenizer.fit_on_texts(trainX)

    # 0 reserved for padding, 1 reserved for unknown words
    # 2 reserved for unused words (least frequent)
    tokenizer.word_index = {
        k: (v + 2)
        for k, v in tokenizer.word_index.items()
    }
    tokenizer.word_index["<UNK>"] = 1
    tokenizer.word_index["<UNUSED>"] = 2

    # encode training data as integers
    trainX = tokenizer.texts_to_sequences(trainX)

    # pad with 0's to max_len
    trainX = pad_sequences(trainX, maxlen=max_len, padding='post')

    # create dictionary from tokenizer
    dictionary = tokenizer.word_index

    # save dictionary as json file to be used by predictor.py
    with open('dictionary.json', 'w', encoding='utf-8') as dictionary_file:
        json.dump(dictionary, dictionary_file, ensure_ascii=False)

    return trainX, trainY, vocab_size, max_len
Ejemplo n.º 4
0
def gen_integrated_frames(metric):
    tqdm_bar_format = CONFIG.get('default', 'tqdm_bar_format')
    images = get_images_list(metric)
    num_frames = (len(images) - 1) * CONFIG.getint('default',
                                                   'frames_per_day') + 1
    with tqdm(total=num_frames, bar_format=tqdm_bar_format) as pbar:
        for n in range(1, num_frames + 1):
            pbar.set_description(datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
                                 ' - ' + 'generating integrated frames')
            pbar.update(1)
            integrate_frame('%05d' % n, '%05d' % n, metric)
Ejemplo n.º 5
0
def gen_timeline_frames(df):
    tqdm_bar_format = CONFIG.get('default', 'tqdm_bar_format')
    interval = CONFIG.getint('default', 'frames_per_day')
    date_list = df['date'].unique().tolist()
    gen_base_timeline_image(date_list)
    tot_frames = (len(date_list) - 1) * interval
    with tqdm(total=tot_frames, bar_format=tqdm_bar_format) as pbar:
        for idx, date_string in enumerate(date_list):
            if idx == len(date_list) - 1:
                gen_timeline_frame(date_string, tot_frames, tot_frames)
                break
            for n in range((idx * interval), (idx * interval) + interval):
                gen_timeline_frame(date_string, n, tot_frames)
                pbar.set_description(
                    datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' - ' +
                    'generating timeline frames')
                pbar.update(1)
    def get_callbacks(self):
        
        callback_list = []

        early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',
                                    patience=CONFIG.getint('DEFAULT', 'PATIENCE'),
                                    verbose=1,
                                    mode='min')

        checkpoint = keras.callbacks.ModelCheckpoint(self.checkpoint_filepath,
                                monitor='val_acc',
                                verbose=1,
                                save_best_only=True,
                                mode='max')

        callback_list.append(early_stopping)
        callback_list.append(checkpoint)

        return callback_list
Ejemplo n.º 7
0
def create_csv_dataframe(history, results):
    history_dict = history.history

    df = pd.DataFrame.from_dict(history_dict)
    df['test_loss'] = results[0]
    df['test_acc'] = results[1]
    df['input nodes'] = CONFIG.getint('DEFAULT', 'EMBEDDING_OUTPUT')
    df['hidden nodes'] = CONFIG.getint('DEFAULT', 'HIDDEN')
    df['output nodes'] = CONFIG.getint('DEFAULT', 'OUTPUT')
    df['epochs'] = CONFIG.getint('DEFAULT', 'EPOCHS')
    df['batch size'] = CONFIG.getint('DEFAULT', 'BATCH_SIZE')
    df['patience'] = CONFIG.getint('DEFAULT', 'PATIENCE')
    df = df.round(decimals=4)

    return df
Ejemplo n.º 8
0
def calc_adr(trade):
    """
    Function to calculate the ATR (avg timeframe rate)
    from trade.start - CONFIG.getint('trade', 'period_atr')

    Parameters
    ----------
    trade : Trade object
            Used for the calculation

    Returns
    -------
    float : ATR for selected period
    """
    delta_period = periodToDelta(CONFIG.getint('trade', 'period_atr'),
                                 trade.timeframe)
    delta_1 = periodToDelta(1, trade.timeframe)
    start = trade.start - delta_period  # get the start datetime
    end = trade.start + delta_1  # increase trade.start by one candle to include trade.start

    c_list = trade.period.slice(start, end)

    return calc_atr(c_list)
Ejemplo n.º 9
0
def get_lasttime(trade):
        '''
        Function to calculate the last time price has been above/below
        a certain HArea

        Parameters
        ----------
        trade : Trade object
                Used for the calculation

        Returns
        -------
        Datetime
        '''
        t_logger.debug("Running get_lasttime")

        # instantiate an HArea object representing the self.SR in order to calculate the lasttime
        # price has been above/below SR
        resist = HArea(price=trade.SR,
                       pips=CONFIG.getint('harea', 'hr_pips'),
                       instrument=trade.pair,
                       granularity=trade.timeframe)

        return trade.period.get_lasttime(resist)
Ejemplo n.º 10
0
def gen_all_crossfade_frames(metric):
    tqdm_bar_format = CONFIG.get('default', 'tqdm_bar_format')
    images_list = get_images_list(metric)
    prev_image = None
    frames_per_day = CONFIG.getint('default', 'frames_per_day')
    slide_time = CONFIG.get('default', 'slide_time')
    # TODO: potentially more future-proof way of handling conversion to seconds + decimal
    slide_time = '00:00:0' + slide_time
    total_frames = frames_per_day * (len(images_list) - 1)
    with tqdm(total=len(images_list) - 1, bar_format=tqdm_bar_format) as pbar:
        for idx, image in enumerate(images_list):
            pbar.set_description(datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
                                 ' - ' + 'generating crossfade frames')
            file_start = ((idx - 1) * frames_per_day) + 1
            if idx == 0:
                prev_image = image
                continue
            gen_crossfade_frames(prev_image, image, metric, file_start,
                                 slide_time)
            prev_image = image
            pbar.update(1)
    extra_frame_num = total_frames + 1
    copyfile('frames/%s_frame_%05d.png' % (metric, total_frames),
             'frames/%s_frame_%05d.png' % (metric, extra_frame_num))
Ejemplo n.º 11
0
def main():

    #trainX, trainY, vocab_size, max_len = prepare_data()

    # split training data up into training and test sets
    #trainX, testX, trainY, testY = train_test_split(trainX, trainY, test_size=0.1)

    # load Python data object (pkl file)
    file = open('data_01.pkl', 'rb')
    trainX = pickle.load(file)
    trainY = pickle.load(file)
    testX = pickle.load(file)
    testY = pickle.load(file)
    vocab_size = pickle.load(file)
    max_len = 140
    file.close()

    # save training, testing, vocabulary size, and max length of tweet to Python data object (pkl file)
    # file = open('data_01.pkl','wb')
    # pickle.dump(trainX, file)
    # pickle.dump(trainY, file)
    # pickle.dump(testX, file)
    # pickle.dump(testY, file)
    # pickle.dump(vocab_size, file)
    # pickle.dump(max_len, file)
    # file.close()

    # exit()

    # one-hot encode labels
    trainY = to_categorical(trainY)

    testY = to_categorical(testY)

    # initialise and create neural network model
    neural_network = NeuralNetwork()

    neural_network.create_model(vocab_size, max_len)

    # print neural network details to console
    neural_network.model.summary()

    # compile neural network
    neural_network.model.compile(optimizer='adam',
                                 loss='categorical_crossentropy',
                                 metrics=['accuracy'])

    # save model architecture
    neural_network.model.save('model.h5')

    val_size = CONFIG.getint('DEFAULT', 'VALIDATION_SIZE')

    # split training data
    x_val = trainX[:val_size]
    partial_x_train = trainX[val_size:]

    y_val = trainY[:val_size]
    partial_y_train = trainY[val_size:]

    # fit model
    history = neural_network.fit_model(partial_x_train, partial_y_train, x_val,
                                       y_val)

    # run complete model on test data set and print results
    results = neural_network.model.evaluate(testX, testY)

    print(neural_network.model.metrics_names)

    print(results)

    # save model as json file
    model_json = neural_network.model.to_json()
    with open('model.json', 'w') as json_file:
        json_file.write(model_json)

    # save model weights
    neural_network.model.save_weights('model.h5')

    #plot_graph(history)

    # write results to CSV file in logs dir
    write_csv(create_csv_dataframe(history, results))
 def fit_model(self, partial_x_train, partial_y_train, x_val, y_val):
     return self.model.fit(partial_x_train, partial_y_train, 
                     epochs=CONFIG.getint('DEFAULT', 'EPOCHS'), 
                     batch_size=CONFIG.getint('DEFAULT', 'BATCH_SIZE'), 
                     validation_data=(x_val, y_val), verbose=1,
                     callbacks=self.get_callbacks())
Ejemplo n.º 13
0
def calc_SR(clO, outfile):
    '''
    Function to calculate S/R lines

    Parameters
    ----------
    clO: CandleList object
         Used for calculation
    outfile : str
              Output filename for .png file

    Return
    ------
    HAreaList object
    '''
    PL = clO.get_pivotlist(th_bounces=CONFIG.getfloat('pivots', 'th_bounces'))

    ## now calculate the price range for calculating the S/R
    # add a number of pips to max,min to be sure that we
    # also detect the extreme pivots
    ul = add_pips2price(clO.data['instrument'], clO.get_highest(),
                        CONFIG.getint('trade_bot', 'add_pips'))
    ll = substract_pips2price(clO.data['instrument'], clO.get_lowest(),
                              CONFIG.getint('trade_bot', 'add_pips'))

    cl_logger.debug("Running calc_SR for estimated range: {0}-{1}".format(
        ll, ul))

    prices, bounces, score_per_bounce, tot_score = ([] for i in range(4))

    # the increment of price in number of pips is double the hr_extension
    prev_p = None
    ##
    ll = 0.6444
    ##
    p = float(ll)

    while p <= float(ul):
        cl_logger.debug("Processing S/R at {0}".format(round(p, 4)))
        # get a PivotList for this particular S/R
        newPL = PL.inarea_pivots(SR=p)
        if len(newPL.plist) == 0:
            mean_pivot = 0
        else:
            mean_pivot = newPL.get_avg_score()

        prices.append(round(p, 5))
        bounces.append(len(newPL.plist))
        tot_score.append(newPL.get_score())
        score_per_bounce.append(mean_pivot)
        # increment price to following price.
        # Because the increment is made in pips
        # it does not suffer of the JPY pairs
        # issue
        p = add_pips2price(clO.data['instrument'], p,
                           2 * CONFIG.getint('candlelist', 'i_pips'))
        if prev_p is None:
            prev_p = p
        else:
            increment_price = round(p - prev_p, 5)
            prev_p = p

    data = {
        'price': prices,
        'bounces': bounces,
        'scores': score_per_bounce,
        'tot_score': tot_score
    }

    df = pd.DataFrame(data=data)

    ### establishing bounces threshold as the args.th quantile
    # selecting only rows with at least one pivot and tot_score>0,
    # so threshold selection considers only these rows
    # and selection is not biased when range of prices is wide
    dfgt1 = df.loc[(df['bounces'] > 0)]
    dfgt2 = df.loc[(df['tot_score'] > 0)]
    bounce_th = dfgt1.bounces.quantile(CONFIG.getfloat('trade_bot', 'th'))
    score_th = dfgt2.tot_score.quantile(CONFIG.getfloat('trade_bot', 'th'))

    print("Selected number of pivot threshold: {0}".format(bounce_th))
    print("Selected tot score threshold: {0}".format(round(score_th, 1)))

    # selecting records over threshold
    dfsel = df.loc[(df['bounces'] > bounce_th) | (df['tot_score'] > score_th)]

    # repeat until no overlap between prices
    ret = calc_diff(dfsel, increment_price)
    dfsel = ret[0]
    tog_seen = ret[1]
    while tog_seen is True:
        ret = calc_diff(dfsel, increment_price)
        dfsel = ret[0]
        tog_seen = ret[1]

    # iterate over DF with selected SR to create a HAreaList
    halist = []
    for index, row in dfsel.iterrows():
        resist = HArea(price=row['price'],
                       pips=CONFIG.getint('pivots', 'hr_pips'),
                       instrument=clO.data['instrument'],
                       granularity=clO.data['granularity'],
                       no_pivots=row['bounces'],
                       tot_score=round(row['tot_score'], 5))
        halist.append(resist)

    halistObj = HAreaList(halist=halist)

    # Plot the HAreaList
    dt_str = clO.data['candles'][-1]['time'].strftime("%d_%m_%Y_%H_%M")

    if CONFIG.getboolean('images', 'plot') is True:
        halistObj.plot(clO=clO, outfile=outfile)

    cl_logger.info("Run done")

    return halistObj
Ejemplo n.º 14
0
def gen_image(date, metric, new_df):
    """Create map image for specific date and metric."""
    fips = new_df['fips'][new_df['date'] == date].unique().tolist()
    values = new_df[metric][new_df['date'] == date].tolist()

    colorscale = gen_colorscale()

    max_range = new_df[metric][new_df[metric] > 0].quantile(
        float(CONFIG.get('default', 'upper_end')))
    endpts = list(np.linspace(1, max_range, len(colorscale) - 1))

    endpts = [int(round(i, 0)) for i in endpts]

    fig = ff.create_choropleth(fips=fips,
                               values=values,
                               binning_endpoints=endpts,
                               colorscale=colorscale,
                               show_state_data=True,
                               state_outline={
                                   'color': 'rgb(0, 0, 0)',
                                   'width': .5
                               },
                               show_hover=True,
                               centroid_marker={'opacity': 0},
                               asp=2.9,
                               width=CONFIG.getint('default', 'image_width'),
                               height=CONFIG.getint('default', 'image_height'),
                               title_text=CONFIG.get(metric, 'slide_title'))

    fig.update_layout(
        dict(
            margin={
                'pad': 20,
                't': 80,
                'b': 200
            },
            legend={
                'font': {
                    'size': 30
                },
                'itemsizing': 'constant'
            },
            title={'font': {
                'size': 30
            }},
            geo={'landcolor': 'white'},
        ))

    fig.add_annotation(x=0,
                       y=0,
                       showarrow=False,
                       font=dict(size=30),
                       text="Produced by Bryan Spaulding",
                       xref="paper",
                       yref="paper")
    fig.add_annotation(x=1,
                       y=.09,
                       showarrow=False,
                       font=dict(size=20),
                       text="Sources",
                       xref="paper",
                       yref="paper")
    fig.add_annotation(x=1,
                       y=.02,
                       showarrow=False,
                       font=dict(size=15),
                       text="NY Times",
                       xref="paper",
                       yref="paper")
    fig.add_annotation(
        x=1,
        y=0,
        showarrow=False,
        font=dict(size=15),
        text=
        "https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html",
        xref="paper",
        yref="paper")
    fig.add_annotation(x=1,
                       y=.065,
                       showarrow=False,
                       font=dict(size=15),
                       text="Census",
                       xref="paper",
                       yref="paper")
    fig.add_annotation(x=1,
                       y=.045,
                       showarrow=False,
                       font=dict(size=15),
                       text="https://www.census.gov/",
                       xref="paper",
                       yref="paper")

    fig.write_image('images/%s_%s.png' % (metric, date))
Ejemplo n.º 15
0
    def run(self):
        """
        Run the bot from self.start

        Returns
        -------
        Trade object or none
        """
        tb_logger.info("Running...")

        conn = Connect(instrument=self.pair, granularity=self.timeframe)

        ser_dir = None
        if CONFIG.has_option('general', 'ser_data_dir'):
            ser_dir = CONFIG.get('general', 'ser_data_dir')

        delta = nhours = None
        if self.timeframe == "D":
            nhours = 24
            delta = timedelta(hours=24)
        else:
            p1 = re.compile('^H')
            m1 = p1.match(self.timeframe)
            if m1:
                nhours = int(self.timeframe.replace('H', ''))
                delta = timedelta(hours=int(nhours))

        # calculate the start datetime for the CList that will be used
        # for calculating the S/R areas
        delta_period = periodToDelta(
            CONFIG.getint('trade_bot', 'period_range'), self.timeframe)

        initc_date = self.start - delta_period
        # Get now a CandleList from 'initc_date' to 'startO' which is the
        # total time interval for this TradeBot
        res = conn.query(start=initc_date.strftime("%Y-%m-%dT%H:%M:%S"),
                         end=self.start.strftime("%Y-%m-%dT%H:%M:%S"),
                         indir=ser_dir)
        clO = CandleList(res)
        dt_str = self.start.strftime("%d_%m_%Y_%H_%M")
        outfile_png = "{0}/srareas/{1}.{2}.{3}.halist.png".format(
            CONFIG.get("images", "outdir"), self.pair, self.timeframe, dt_str)
        SRlst = calc_SR(clO, outfile=outfile_png)

        # fetch candle for current datetime
        res = conn.query(start=self.start.strftime("%Y-%m-%dT%H:%M:%S"),
                         count=1,
                         indir=ser_dir)

        # this is the current candle that
        # is being checked
        c_candle = Candle(dict_data=res['candles'][0])
        c_candle.time = datetime.strptime(c_candle.time,
                                          '%Y-%m-%dT%H:%M:%S.%fZ')

        # check if there is any HArea overlapping with c_candle
        HAreaSel, sel_ix = SRlst.onArea(candle=c_candle)

        if HAreaSel is not None:
            c_candle.set_candle_features()
            # guess the if trade is 'long' or 'short'
            newCl = clO.slice(start=initc_date, end=c_candle.time)
            type = get_trade_type(c_candle.time, newCl)
            SL = adjust_SL(type, newCl, CONFIG.getint('trade_bot', 'n_SL'))
            prepare = False
            if c_candle.indecision_c(
                    ic_perc=CONFIG.getint('general', 'ic_perc')) is True:
                prepare = True
            elif type == 'short' and c_candle.colour == 'red':
                prepare = True
            elif type == 'long' and c_candle.colour == 'green':
                prepare = True

            # discard if IC falls on a Saturday
            if c_candle.time.weekday() == 5 and discard_sat is True:
                tb_logger.info(
                    "Possible trade at {0} falls on Sat. Skipping...".format(
                        c_candle.time))
                prepare = False

            t = None
            if prepare is True:
                t = prepare_trade(tb_obj=self,
                                  type=type,
                                  SL=SL,
                                  ic=c_candle,
                                  harea_sel=HAreaSel,
                                  delta=delta,
                                  add_pips=CONFIG.getint('trade', 'add_pips'))
                t.tot_SR = len(SRlst.halist)
                t.rank_selSR = sel_ix
                t.SRlst = SRlst
            return t
        tb_logger.info("Run done")
Ejemplo n.º 16
0
    partial_y_train = trainY[val_size:]

    # fit model
    history = neural_network.fit_model(partial_x_train, partial_y_train, x_val,
                                       y_val)

    # run complete model on test data set and print results
    results = neural_network.model.evaluate(testX, testY)

    print(neural_network.model.metrics_names)

    print(results)

    # save model as json file
    model_json = neural_network.model.to_json()
    with open('model.json', 'w') as json_file:
        json_file.write(model_json)

    # save model weights
    neural_network.model.save_weights('model.h5')

    #plot_graph(history)

    # write results to CSV file in logs dir
    write_csv(create_csv_dataframe(history, results))


# program entry point
if __name__ == '__main__':
    for i in range(CONFIG.getint('DEFAULT', 'RUNS_PER_EPOCH')):
        main()
Ejemplo n.º 17
0
def gen_data():
    """Retrieve data from datasources and merge into dataframe."""

    rolling_days = CONFIG.getint('default', 'rolling_days')

    nytimes_retrieve = retrieve_file(
        CONFIG.get('default', 'nytimes_csv_url'),
        CONFIG.get('default', 'nytimes_file_name'),
    )

    census_retrieve = retrieve_file(
        CONFIG.get('default', 'census_csv_url'),
        CONFIG.get('default', 'census_file_name'),
    )

    data_file = os.path.join(CONFIG.get('default', 'data_dir'),
                             CONFIG.get('default', 'data_file'))

    if os.path.exists(data_file) and nytimes_retrieve and census_retrieve:
        log('data file exists and is current')
        new_df = pd.read_csv(data_file, dtype={'fips': str})
        return

    log('creating base pandas dataframe')
    nytimes_file_name = os.path.join(
        CONFIG.get('default', 'data_dir'),
        CONFIG.get('default', 'nytimes_file_name'))
    dat_df = pd.read_csv(nytimes_file_name, dtype={'fips': str})

    dates = dat_df['date'].unique().tolist()

    log('creating population dataframe')
    census_file_name = os.path.join(CONFIG.get('default', 'data_dir'),
                                    CONFIG.get('default', 'census_file_name'))
    pop_df = pd.read_csv(census_file_name,
                         encoding='ISO-8859-1',
                         dtype={
                             'SUBLEV': str,
                             'REGION': str,
                             'DIVISION': str,
                             'STATE': str,
                             'COUNTY': str
                         })

    pop_df = pop_df[pop_df['COUNTY'] != '000']
    pop_df['fips'] = pop_df['STATE'] + pop_df['COUNTY']
    pop_df = pop_df[['fips', 'POPESTIMATE2019', 'CTYNAME', 'STNAME']]
    pop_df['CTYNAME'] = pop_df['CTYNAME'].apply(
        lambda x: x.replace(' County', ''))
    pop_df.columns = ['fips', 'population', 'county', 'state']

    csv = 'date,fips,population,county,state\n'
    for date in dates:
        for idx, row in pop_df.iterrows():
            csv += '%s,%s,%s,"%s","%s"\n' % (date, row['fips'],
                                             row['population'], row['county'],
                                             row['state'])

    log('merging data frames')
    new_df = pd.read_csv(StringIO(csv), dtype={'fips': str})
    new_df = new_df.merge(dat_df,
                          how='left',
                          left_on=['date', 'fips'],
                          right_on=['date', 'fips'])

    new_df = new_df[[
        'date', 'fips', 'population', 'county_x', 'state_x', 'cases', 'deaths'
    ]]
    new_df.columns = [
        'date', 'fips', 'population', 'county', 'state', 'cases', 'deaths'
    ]
    new_df = new_df.fillna(0)

    log('calculating new columns')
    new_df['cases'] = new_df['cases'].astype(int)
    new_df['deaths'] = new_df['deaths'].astype(int)

    new_df['cases_new'] = new_df.groupby(['fips'])['cases'].diff().fillna(0)
    new_df['deaths_new'] = new_df.groupby('fips')['deaths'].diff().fillna(0)

    new_df['cases_new'] = new_df['cases_new'].apply(lambda x: 0
                                                    if x < 0 else x)
    new_df['deaths_new'] = new_df['deaths_new'].apply(lambda x: 0
                                                      if x < 0 else x)

    new_df['cases_roll'] = new_df.groupby('fips')['cases_new'].rolling(rolling_days).mean() \
                                 .reset_index(0, drop=True).fillna(0)
    new_df['deaths_roll'] = new_df.groupby('fips')['deaths_new'].rolling(rolling_days).mean() \
                                  .reset_index(0, drop=True).fillna(0)

    per_capita_unit = int(CONFIG.get('default', 'per_capita_unit'))
    new_df['cases_pc'] = new_df['cases_roll'] / new_df[
        'population'] * per_capita_unit
    new_df['deaths_pc'] = new_df['deaths_roll'] / new_df[
        'population'] * per_capita_unit

    log('saving merged dataframe')
    new_df.to_csv(data_file, index=False)

    log('done processing dataframe')
Ejemplo n.º 18
0
    def run(self, discard_sat=True):
        '''
        This function will run the Bot from start to end
        one candle at a time

        Parameter
        ---------
        discard_sat : Bool
                      If this is set to True, then the Trade wil not
                      be taken if IC falls on a Saturday. Default: True

        Returns
        -------
        TradeList object with Trades taken. None if no trades
        were taken
        '''
        tb_logger.info("Running...")
        conn = Connect(instrument=self.pair, granularity=self.timeframe)

        ser_dir = None
        if CONFIG.has_option('general', 'ser_data_dir'):
            ser_dir = CONFIG.get('general', 'ser_data_dir')

        delta = nhours = None
        if self.timeframe == "D":
            nhours = 24
            delta = timedelta(hours=24)
        else:
            p1 = re.compile('^H')
            m1 = p1.match(self.timeframe)
            if m1:
                nhours = int(self.timeframe.replace('H', ''))
                delta = timedelta(hours=int(nhours))

        # convert to datetime the start and end for this TradeBot
        startO = pd.datetime.strptime(self.start, '%Y-%m-%d %H:%M:%S')
        endO = pd.datetime.strptime(self.end, '%Y-%m-%d %H:%M:%S')

        loop = 0
        tlist = []
        tend = SRlst = None
        # calculate the start datetime for the CList that will be used
        # for calculating the S/R areas
        delta_period = periodToDelta(
            CONFIG.getint('trade_bot', 'period_range'), self.timeframe)
        initc_date = startO - delta_period
        # Get now a CandleList from 'initc_date' to 'startO' which is the
        # total time interval for this TradeBot
        res = conn.query(start=initc_date.isoformat(),
                         end=endO.isoformat(),
                         indir=ser_dir)

        clO = CandleList(res)
        while startO <= endO:

            if tend is not None:
                # this means that there is currently an active trade
                if startO <= tend:
                    startO = startO + delta
                    loop += 1
                    continue
                else:
                    tend = None
            tb_logger.info("Trade bot - analyzing candle: {0}".format(
                startO.isoformat()))
            sub_clO = clO.slice(initc_date, startO)
            dt_str = startO.strftime("%d_%m_%Y_%H_%M")
            if loop == 0:
                outfile_txt = "{0}/srareas/{1}.{2}.{3}.halist.txt".format(
                    CONFIG.get("images", "outdir"), self.pair, self.timeframe,
                    dt_str)
                outfile_png = "{0}/srareas/{1}.{2}.{3}.halist.png".format(
                    CONFIG.get("images", "outdir"), self.pair, self.timeframe,
                    dt_str)
                SRlst = calc_SR(sub_clO, outfile=outfile_png)
                f = open(outfile_txt, 'w')
                res = SRlst.print()
                # print SR report to file
                f.write(res)
                f.close()
                tb_logger.info("Identified HAreaList for time {0}:".format(
                    startO.isoformat()))
                tb_logger.info("{0}".format(res))
            elif loop >= CONFIG.getint('trade_bot', 'period'):
                # An entire cycle has occurred. Invoke .calc_SR
                outfile_txt = "{0}/srareas/{1}.{2}.{3}.halist.txt".format(
                    CONFIG.get("images", "outdir"), self.pair, self.timeframe,
                    dt_str)
                outfile_png = "{0}/srareas/{1}.{2}.{3}.halist.png".format(
                    CONFIG.get("images", "outdir"), self.pair, self.timeframe,
                    dt_str)
                SRlst = calc_SR(sub_clO, outfile=outfile_png)
                f = open(outfile_txt, 'w')
                res = SRlst.print()
                tb_logger.info("Identified HAreaList for time {0}:".format(
                    startO.isoformat()))
                tb_logger.info("{0}".format(res))
                # print SR report to file
                f.write(res)
                f.close()
                loop = 0

            # fetch candle for current datetime
            res = conn.query(start=startO.isoformat(), count=1, indir=ser_dir)

            # this is the current candle that
            # is being checked
            c_candle = Candle(dict_data=res['candles'][0])
            c_candle.time = datetime.strptime(c_candle.time,
                                              '%Y-%m-%dT%H:%M:%S.%fZ')

            # c_candle.time is not equal to startO
            # when startO is non-working day, for example
            delta1hr = timedelta(hours=1)
            if (c_candle.time != startO) and (abs(c_candle.time - startO) >
                                              delta1hr):
                loop += 1
                tb_logger.info(
                    "Analysed dt {0} is not the same than APIs returned dt {1}."
                    " Skipping...".format(startO, c_candle.time))
                startO = startO + delta
                continue

            #check if there is any HArea overlapping with c_candle
            HAreaSel, sel_ix = SRlst.onArea(candle=c_candle)

            if HAreaSel is not None:
                c_candle.set_candle_features()
                # guess the if trade is 'long' or 'short'
                newCl = clO.slice(start=initc_date, end=c_candle.time)
                type = get_trade_type(c_candle.time, newCl)
                SL = adjust_SL(type, newCl, CONFIG.getint('trade_bot', 'n_SL'))
                prepare = False
                if c_candle.indecision_c(
                        ic_perc=CONFIG.getint('general', 'ic_perc')) is True:
                    prepare = True
                elif type == 'short' and c_candle.colour == 'red':
                    prepare = True
                elif type == 'long' and c_candle.colour == 'green':
                    prepare = True

                # discard if IC falls on a Saturday
                if c_candle.time.weekday() == 5 and discard_sat is True:
                    tb_logger.info(
                        "Possible trade at {0} falls on Sat. Skipping...".
                        format(c_candle.time))
                    prepare = False

                if prepare is True:
                    t = prepare_trade(tb_obj=self,
                                      type=type,
                                      SL=SL,
                                      ic=c_candle,
                                      harea_sel=HAreaSel,
                                      delta=delta,
                                      add_pips=CONFIG.getint(
                                          'trade', 'add_pips'))
                    t.tot_SR = len(SRlst.halist)
                    t.rank_selSR = sel_ix
                    t.SRlst = SRlst
                    tlist.append(t)
            startO = startO + delta
            loop += 1

        tb_logger.info("Run done")

        if len(tlist) == 0:
            return None
        else:
            return tlist
Ejemplo n.º 19
0
import json
import numpy as np
import keras
import tensorflow as tf
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.models import model_from_json
from keras.preprocessing.sequence import pad_sequences
from config import CONFIG

# disable tensorflow logging depreciation errors to console
tf.logging.set_verbosity(tf.logging.ERROR)

tokenizer = Tokenizer(num_words=CONFIG.getint('DEFAULT', 'VOCAB_SIZE'))

# open dictionary file created when training model
dictionary_file = open('dictionary.json', 'r', encoding='utf-8')
dictionary = json.load(dictionary_file)
dictionary_file.close()

# encode all words to utf-8
for key, value in dictionary.items():
    str(key).encode('utf-8')

# create tokenizer from dictionary
tokenizer.word_index = dictionary

# load saved model architecture
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()