Beispiel #1
0
def load_feature_map(model, directory):
    r"""Load the feature map from storage. By default, the
    most recent feature map is loaded into memory.

    Parameters
    ----------
    model : alphapy.Model
        The model object to contain the feature map.
    directory : str
        Full directory specification of the feature map's location.

    Returns
    -------
    model : alphapy.Model
        The model object containing the feature map.

    """

    # Locate the feature map and load it

    try:
        search_dir = SSEP.join([directory, 'model'])
        file_name = most_recent_file(search_dir, 'feature_map_*.pkl')
        logger.info("Loading feature map from %s", file_name)
        # load the feature map
        feature_map = joblib.load(file_name)
        model.feature_map = feature_map
    except:
        logging.error("Could not find feature map in %s", search_path)

    # Return the model with the feature map
    return model
Beispiel #2
0
def load_predictor(directory):
    r"""Load the model predictor from storage. By default, the
    most recent model is loaded into memory.

    Parameters
    ----------
    directory : str
        Full directory specification of the predictor's location.

    Returns
    -------
    predictor : function
        The scoring function.

    """

    # Locate the model Pickle file

    try:
        search_dir = SSEP.join([directory, 'model'])
        file_name = most_recent_file(search_dir, 'model_*.pkl')
        logger.info("Loading model predictor from %s", file_name)
        # load the model predictor
        predictor = joblib.load(file_name)
    except:
        logging.error("Could not find model predictor in %s", search_path)

    # Return the model predictor
    return predictor
Beispiel #3
0
def load_predictor(directory):
    r"""Load the model predictor from storage. By default, the
    most recent model is loaded into memory.

    Parameters
    ----------
    directory : str
        Full directory specification of the predictor's location.

    Returns
    -------
    predictor : function
        The scoring function.

    """

    # Locate the model Pickle or HD5 file

    search_dir = SSEP.join([directory, 'model'])
    file_name = most_recent_file(search_dir, 'model_*.*')

    # Load the model from the file

    file_ext = file_name.split(PSEP)[-1]
    if file_ext == 'pkl' or file_ext == 'h5':
        logger.info("Loading model predictor from %s", file_name)
        # load the model predictor
        if file_ext == 'pkl':
            predictor = joblib.load(file_name)
        elif file_ext == 'h5':
            predictor = load_model(file_name)
    else:
        logging.error("Could not find model predictor in %s", search_path)

    # Return the model predictor
    return predictor
Beispiel #4
0
def save_predictions(model, tag, partition):
    r"""Save the predictions to disk.

    Parameters
    ----------
    model : alphapy.Model
        The model object to save.
    tag : str
        A unique identifier for the output files, e.g., a date stamp.
    partition : alphapy.Partition
        Reference to the dataset.

    Returns
    -------
    preds : numpy array
        The prediction vector.
    probas : numpy array
        The probability vector.

    """

    # Extract model parameters.

    directory = model.specs['directory']
    extension = model.specs['extension']
    model_type = model.specs['model_type']
    separator = model.specs['separator']

    # Get date stamp to record file creation
    timestamp = get_datestamp()

    # Specify input and output directories

    input_dir = SSEP.join([directory, 'input'])
    output_dir = SSEP.join([directory, 'output'])

    # Read the prediction frame
    file_spec = ''.join([datasets[partition], '*'])
    file_name = most_recent_file(input_dir, file_spec)
    file_name = file_name.split(SSEP)[-1].split(PSEP)[0]
    pf = read_frame(input_dir, file_name, extension, separator)

    # Cull records before the prediction date

    try:
        predict_date = model.specs['predict_date']
        found_pdate = True
    except:
        found_pdate = False

    if found_pdate:
        pd_indices = pf[pf.date >= predict_date].index.tolist()
        pf = pf.iloc[pd_indices]
    else:
        pd_indices = pf.index.tolist()

    # Save predictions for all projects

    logger.info("Saving Predictions")
    output_file = USEP.join(['predictions', timestamp])
    preds = model.preds[(tag, partition)].squeeze()
    if found_pdate:
        preds = np.take(preds, pd_indices)
    pred_series = pd.Series(preds, index=pd_indices)
    df_pred = pd.DataFrame(pred_series, columns=['prediction'])
    write_frame(df_pred, output_dir, output_file, extension, separator)

    # Save probabilities for classification projects

    probas = None
    if model_type == ModelType.classification:
        logger.info("Saving Probabilities")
        output_file = USEP.join(['probabilities', timestamp])
        probas = model.probas[(tag, partition)].squeeze()
        if found_pdate:
            probas = np.take(probas, pd_indices)
        prob_series = pd.Series(probas, index=pd_indices)
        df_prob = pd.DataFrame(prob_series, columns=['probability'])
        write_frame(df_prob, output_dir, output_file, extension, separator)

    # Save ranked predictions

    logger.info("Saving Ranked Predictions")
    pf['prediction'] = pred_series
    if model_type == ModelType.classification:
        pf['probability'] = prob_series
        pf.sort_values('probability', ascending=False, inplace=True)
    else:
        pf.sort_values('prediction', ascending=False, inplace=True)
    output_file = USEP.join(['rankings', timestamp])
    write_frame(pf, output_dir, output_file, extension, separator)

    # Return predictions and any probabilities
    return preds, probas
Beispiel #5
0
def trade_system(model, system, space, intraday, name, quantity):
    r"""Trade the given system.

    Parameters
    ----------
    model : alphapy.Model
        The model object with specifications.
    system : alphapy.System
        The long/short system to run.
    space : alphapy.Space
        Namespace of instrument prices.
    intraday : bool
        If True, then run an intraday system.
    name : str
        The symbol to trade.
    quantity : float
        The amount of the ``name`` to trade, e.g., number of shares

    Returns
    -------
    tradelist : list
        List of trade entries and exits.

    Other Parameters
    ----------------
    Frame.frames : dict
        All of the data frames containing price data.

    """

    # Unpack the model data.

    directory = model.specs['directory']
    extension = model.specs['extension']
    separator = model.specs['separator']

    # Unpack the system parameters.

    longentry = system.longentry
    shortentry = system.shortentry
    longexit = system.longexit
    shortexit = system.shortexit
    holdperiod = system.holdperiod
    scale = system.scale

    # Determine whether or not this is a model-driven system.

    entries_and_exits = [longentry, shortentry, longexit, shortexit]
    active_signals = [x for x in entries_and_exits if x is not None]
    use_model = False
    for signal in active_signals:
        if any(x in signal for x in ['phigh', 'plow']):
            use_model = True

    # Read in the price frame
    pf = Frame.frames[frame_name(name, space)].df

    # Use model output probabilities as input to the system

    if use_model:
        # get latest probabilities file
        probs_dir = SSEP.join([directory, 'output'])
        file_path = most_recent_file(probs_dir, 'probabilities*')
        file_name = file_path.split(SSEP)[-1].split('.')[0]
        # read the probabilities frame and trim the price frame
        probs_frame = read_frame(probs_dir, file_name, extension, separator)
        pf = pf[-probs_frame.shape[0]:]
        probs_frame.index = pf.index
        probs_frame.columns = ['probability']
        # add probability column to price frame
        pf = pd.concat([pf, probs_frame], axis=1)

    # Evaluate the long and short events in the price frame

    for signal in active_signals:
        vexec(pf, signal)

    # Initialize trading state variables

    inlong = False
    inshort = False
    h = 0
    p = 0
    q = quantity
    tradelist = []

    # Loop through prices and generate trades

    for dt, row in pf.iterrows():
        # get closing price
        c = row['close']
        if intraday:
            bar_number = row['bar_number']
            end_of_day = row['end_of_day']
        # evaluate entry and exit conditions
        lerow = row[longentry] if longentry else None
        serow = row[shortentry] if shortentry else None
        lxrow = row[longexit] if longexit else None
        sxrow = row[shortexit] if shortexit else None
        # process the long and short events
        if lerow:
            if p < 0:
                # short active, so exit short
                tradelist.append((dt, [name, Orders.sx, -p, c]))
                inshort = False
                h = 0
                p = 0
            if p == 0 or scale:
                # go long (again)
                tradelist.append((dt, [name, Orders.le, q, c]))
                inlong = True
                p = p + q
        elif serow:
            if p > 0:
                # long active, so exit long
                tradelist.append((dt, [name, Orders.lx, -p, c]))
                inlong = False
                h = 0
                p = 0
            if p == 0 or scale:
                # go short (again)
                tradelist.append((dt, [name, Orders.se, -q, c]))
                inshort = True
                p = p - q
        # check exit conditions
        if inlong and h > 0 and lxrow:
            # long active, so exit long
            tradelist.append((dt, [name, Orders.lx, -p, c]))
            inlong = False
            h = 0
            p = 0
        if inshort and h > 0 and sxrow:
            # short active, so exit short
            tradelist.append((dt, [name, Orders.sx, -p, c]))
            inshort = False
            h = 0
            p = 0
        # if a holding period was given, then check for exit
        if holdperiod and h >= holdperiod:
            if inlong:
                tradelist.append((dt, [name, Orders.lh, -p, c]))
                inlong = False
            if inshort:
                tradelist.append((dt, [name, Orders.sh, -p, c]))
                inshort = False
            h = 0
            p = 0
        # increment the hold counter
        if inlong or inshort:
            h += 1
            if intraday and end_of_day:
                if inlong:
                    # long active, so exit long
                    tradelist.append((dt, [name, Orders.lx, -p, c]))
                    inlong = False
                if inshort:
                    # short active, so exit short
                    tradelist.append((dt, [name, Orders.sx, -p, c]))
                    inshort = False
                h = 0
                p = 0
    return tradelist