예제 #1
0
def load_model():
    model_save_path = constant.load_model_path
    state = torch.load(model_save_path,
                       map_location=lambda storage, location: storage)
    constant.arg = state['config']
    load_settings()

    if constant.model == "LSTM":
        model = LstmModel(vocab=vocab,
                          embedding_size=constant.emb_dim,
                          hidden_size=constant.hidden_dim,
                          num_layers=constant.n_layers,
                          is_bidirectional=constant.bidirec,
                          input_dropout=constant.drop,
                          layer_dropout=constant.drop,
                          attentive=constant.attn)
    elif constant.model == "UTRS":
        model = UTransformer(vocab=vocab,
                             embedding_size=constant.emb_dim,
                             hidden_size=constant.hidden_dim,
                             num_layers=constant.hop,
                             num_heads=constant.heads,
                             total_key_depth=constant.depth,
                             total_value_depth=constant.depth,
                             filter_size=constant.filter,
                             act=constant.act)
    elif constant.model == "ELMO":
        model = ELMoEncoder(C=4)
    else:
        print("Model is not defined")
        exit(0)

    model = model.load_state_dict(state['model'])
    return model
예제 #2
0
def rebalance_porfolio(date, asset, companies_data):
    # date_string = datetime.datetime.strftime(date, "%Y-%m-%d")
    print("=" * 50)
    print("Rebalancing Portfolio For Current Date: ", date)
    print("=" * 50)

    # Load Trained Model
    lstm = LstmModel(name="Test2", load=True)

    # Save Values In Dict
    companies_rtv = dict()
    companies_prices = dict()

    # Predict One-Week Price
    # Calculate Return-To-Volatility
    # Save The Result For Different Companies
    day_in, day_want, step = 50, 10, 2
    for company, data in companies_data.items():
        # Predictions From LSTM
        idx = data[data['Date'] == date].index.item()
        _input = data.iloc[idx - day_in:idx + day_want - step + 1,
                           1:data.shape[1]]
        predictions = pd.DataFrame(lstm.predict(_input),
                                   index=pd.date_range(start=date, periods=10))

        # Calculate Return-To-Volatility
        rtv = calculate_rtv(predictions)
        companies_rtv[company] = rtv

        # Get Price List
        companies_prices[company] = data.iloc[idx, 5]

    # Sort The Results Descending & Choose Top 10 Companies
    filtered_list = list(filter(lambda x: x[1] >= 0, companies_rtv.items()))
    sorted_list = sorted(filtered_list, key=lambda x: x[1], reverse=True)
    companies_rtv_filter = {t[0]: t[1] for t in sorted_list[:10]}

    # Build Portfolio
    rtv_sum = sum(companies_rtv_filter.values())
    portfolio_pct = {
        key: (value / rtv_sum)
        for key, value in companies_rtv_filter.items()
    }
    portfolio = {
        company: ((asset * pct) / companies_prices[company])
        for company, pct in portfolio_pct.items()
    }

    print("Company Prices: ", companies_prices)
    print("Companies Return-To-Volatility: ", companies_rtv)
    print("Filtered 10 Companies: ", companies_rtv_filter)
    print("Portfolio Percentage: ", portfolio_pct)
    print("Portfolio: ", portfolio)

    return portfolio, portfolio_pct
예제 #3
0
def get_baseline_model(args):
    vocab = utils.load_vocab(args.vocab_json)
    if args.baseline_start_from is not None:
        model, kwargs = utils.load_model(args.baseline_start_from)
    elif args.model_type == 'LSTM':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = LstmModel(**kwargs)
    elif args.model_type == 'CNN+LSTM':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'cnn_feat_dim': parse_int_list(args.feature_dim),
            'cnn_num_res_blocks': args.cnn_num_res_blocks,
            'cnn_res_block_dim': args.cnn_res_block_dim,
            'cnn_proj_dim': args.cnn_proj_dim,
            'cnn_pooling': args.cnn_pooling,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = CnnLstmModel(**kwargs)
    elif args.model_type == 'CNN+LSTM+SA':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'cnn_feat_dim': parse_int_list(args.feature_dim),
            'stacked_attn_dim': args.stacked_attn_dim,
            'num_stacked_attn': args.num_stacked_attn,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = CnnLstmSaModel(**kwargs)
    if model.rnn.token_to_idx != vocab['question_token_to_idx']:
        # Make sure new vocab is superset of old
        for k, v in model.rnn.token_to_idx.items():
            assert k in vocab['question_token_to_idx']
            assert vocab['question_token_to_idx'][k] == v
        for token, idx in vocab['question_token_to_idx'].items():
            model.rnn.token_to_idx[token] = idx
        kwargs['vocab'] = vocab
        model.rnn.expand_vocab(vocab['question_token_to_idx'])
    model.cuda()
    model.train()
    return model, kwargs
예제 #4
0
def dummy_porfolio(date, asset, companies_data):
    lstm = LstmModel(name="Test2", load=True)
    day_in, day_want, step = 50, 10, 2
    for company, data in companies_data.items():
        # Predictions From LSTM
        idx = data[data['Date'] == date].index.item()
        print(data.iloc[idx - day_in:idx + day_want - step + 1,
                        0:data.shape[1]])
        _input = data.iloc[idx - day_in:idx + day_want - step + 1,
                           1:data.shape[1]]
        predictions = pd.DataFrame(lstm.predict(_input),
                                   index=pd.date_range(start=date, periods=10))
        break
    return {}, {}
예제 #5
0
def load_model():
    model_load_path = constant.load_model_path
    model_save_path = constant.save_path
    state = torch.load(model_load_path,
                       map_location=lambda storage, location: storage)
    arg = state['config']
    load_settings(arg)

    data_loaders_train, data_loaders_val, data_loaders_test, vocab = prepare_data_loaders(
        num_split=1,
        batch_size=constant.batch_size,
        hier=False,
        elmo=constant.elmo,
        dev_with_label=False,
        include_test=True)

    if constant.model == "LSTM":
        model = LstmModel(vocab=vocab,
                          embedding_size=constant.emb_dim,
                          hidden_size=constant.hidden_dim,
                          num_layers=constant.n_layers,
                          is_bidirectional=constant.bidirec,
                          input_dropout=constant.drop,
                          layer_dropout=constant.drop,
                          attentive=constant.attn)
    elif constant.model == "UTRS":
        model = UTransformer(vocab=vocab,
                             embedding_size=constant.emb_dim,
                             hidden_size=constant.hidden_dim,
                             num_layers=constant.hop,
                             num_heads=constant.heads,
                             total_key_depth=constant.depth,
                             total_value_depth=constant.depth,
                             filter_size=constant.filter,
                             act=constant.act)
    elif constant.model == "ELMO":
        model = ELMoEncoder(C=4)
    else:
        print("Model is not defined")
        exit(0)

    model.load_state_dict(state['model'])
    return model, data_loaders_test, vocab, model_save_path
예제 #6
0
import keras
import pandas as pd
import numpy as np
from models import LstmModel
from config import DIR_CONFIG, FILE_CONFIG


if __name__ == "__main__":
    # Initialize Dates
    # START_DATE = '2017-01-02'
    # END_DATE = '2019-06-01'
    START_DATE = '2015-07-06'
    END_DATE = '2018-05-28'

    # Initialize Model
    lstm = LstmModel(name='Old2')

    # Load List Of Companies
    companies = []

    with open(FILE_CONFIG["COMPANY_LIST"], 'r') as f:
        for line in f:
            companies.append(line.rstrip("\n"))
    print(companies)

    # Train Model With Different Companies Adj Close Values
    for company in companies:
        path = os.path.join(DIR_CONFIG["DATA_DIR"], '{}_data.csv'.format(company))
        raw_data = pd.read_csv(path, index_col=0)
        raw_data = raw_data.loc[:END_DATE]["Adj Close"].values
        raw_data = np.reshape(raw_data, (raw_data.shape[0], 1))
예제 #7
0
import json

# Add the models directory to the path
dir_path = os.path.dirname(os.path.realpath(__file__))
parent_dir = os.path.dirname(dir_path)
sys.path.append(os.path.join(parent_dir, 'birdsong'))

#from models import Zilpzalp
#from models import Hawk
from models import LstmModel
from utils import avg_score, maxwindow_score, get_top5_prediction

app = Flask(__name__)

# Initiate the model
model = LstmModel(time_axis=216, freq_axis=256, no_classes=100)

# Load the state of  model from checkpoint
checkpoint_path = 'model/checkpoint_Lstm_29-03'
checkpoint = torch.load(checkpoint_path, map_location='cpu')
state = checkpoint['state_dict']
model.load_state_dict(state)
model.eval()

# Add the dictionary with the species info
label_dict = {}
reader = csv.DictReader(open('model/top100_codes_translated.csv'))
for row in reader:
    label_dict[int(row['id1'])] = {
        'name': row['english'],
        'img_source': row['img_source'],
예제 #8
0
import keras
import pandas as pd
import numpy
from models import LstmModel

CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(CURRENT_DIR, 'old_data')
CHECKPOINT_DIR = os.path.join(CURRENT_DIR, 'checkpoint')

print("Data Stored In", DATA_DIR)
print("Checkpoint Stored In", CHECKPOINT_DIR)

if __name__ == "__main__":

    # Initialize Model
    lstm = LstmModel(name='Test2')

    # Load List Of Companies
    companies = []

    with open(os.path.join(CURRENT_DIR, "company_list.txt"), 'r') as f:
        for line in f:
            companies.append(line.rstrip("\n"))
    print(companies)

    # Train Model With Different Companies Data
    for company in companies:
        path = os.path.join(DATA_DIR, '{}_data.csv'.format(company))
        raw_data = pd.read_csv(path, index_col=0)
        raw_data = raw_data.loc[:'2017-12-31'].values
        #print(raw_data)
예제 #9
0
def rebalance_porfolio(date, asset, companies_data):
    # date_string = datetime.datetime.strftime(date, "%Y-%m-%d")
    print("=" * 50)
    print("Rebalancing Portfolio For Current Date: ", date)
    print("=" * 50)

    # Load Trained Model
    lstm = LstmModel(name="Old1", load=True)

    # Save Values In Dict
    companies_rtv = dict()
    companies_prices = dict()

    # Predict One-Week Price
    # Calculate Return-To-Volatility
    # Save The Result For Different Companies
    day_in, day_out, num_features = lstm.load_settings()

    for company, data in companies_data.items():
        # Current Date's Index
        try:
            idx = data[data['Date'] == date].index.values[0]
        except IndexError:  # When Company Data Is Empty On That Date
            continue

        # Predictions From LSTM
        _input = data.iloc[idx - day_in:idx]['Adj Close'].values
        _input = np.reshape(_input, (_input.shape[0], 1))
        predictions = pd.DataFrame(lstm.predict(company=company,
                                                _input=_input),
                                   index=pd.date_range(start=date,
                                                       periods=day_out))

        # Calculate Return-To-Volatility
        rtv = calculate_rtv(predictions)
        companies_rtv[company] = rtv

        # Get Adj Closing Price List
        companies_prices[company] = data.iloc[idx, 4]

    # Sort The Results Descending & Choose Top 10 Companies
    filtered_list = list(filter(lambda x: x[1] > 0, companies_rtv.items()))
    sorted_list = sorted(filtered_list, key=lambda x: x[1], reverse=True)
    companies_rtv_filter = {t[0]: t[1] for t in sorted_list[:10]}

    # Build Portfolio
    rtv_sum = sum(companies_rtv_filter.values())
    portfolio_pct = {
        key: (value / rtv_sum)
        for key, value in companies_rtv_filter.items()
    }
    portfolio = {
        company: ((asset * pct) / companies_prices[company])
        for company, pct in portfolio_pct.items()
    }

    print("Company Prices: ", companies_prices)
    print("Companies Return-To-Volatility: ", companies_rtv)
    print("Filtered 10 Companies: ", companies_rtv_filter)
    print("Portfolio Percentage: ", portfolio_pct)
    print("Portfolio: ", portfolio)

    return portfolio, portfolio_pct
예제 #10
0
def main():
    logging.basicConfig(level=logging.INFO)
    task_id = str(int(time.time()))
    tmp_model_path = os.path.join('/tmp', '%s.h5' % task_id)

    if True:
        task = {
            'task_id':
            task_id,
            'score_metric':
            'val_rmse',
            'dataset_path':
            'showdown_full',
            'final':
            True,
            'model_config':
            TransferLstmModel.create_cnn(
                tmp_model_path,
                transform_model_config={
                    'model_uri':
                    '/models/snapshots/regression/1480182349/31.h5',
                    'scale': 16,
                    'type': 'regression'
                },
                timesteps=50,
                W_l2=0.001,
                scale=16.,
                input_shape=(120, 320, 3)),
            'training_args': {
                'batch_size': 32,
                'epochs': 100,
            },
        }

    if False:
        task = {
            'task_id':
            task_id,
            'score_metric':
            'loss',
            'dataset_path':
            'shinale_full',
            'final':
            False,
            'model_config':
            RegressionModel.create_resnet_inception_v2(tmp_model_path,
                                                       learning_rate=0.001,
                                                       input_shape=(120, 320,
                                                                    3)),
            'training_args': {
                'batch_size': 16,
                'epochs': 100,
                'pctl_sampling': 'uniform',
                'pctl_thresholds': showdown_percentiles(),
            },
        }

    if False:
        task = {
            'task_id': task_id,
            'score_metric': 'loss',
            'dataset_path': 'showdown_full',
            'final': True,
            'model_config': {
                'model_uri': '/models/output/1480004259.h5',
                'scale': 16,
                'type': 'regression'
            },
            'training_args': {
                'batch_size': 32,
                'epochs': 40,
            },
        }

    if False:
        # sharp left vs center vs sharp right
        task = {
            'task_id':
            task_id,
            'dataset_path':
            'finale_full',
            'score_metric':
            'val_categorical_accuracy',
            'model_config':
            CategoricalModel.create(tmp_model_path,
                                    use_adadelta=True,
                                    W_l2=0.001,
                                    thresholds=[-0.061, 0.061]),
            'training_args': {
                'batch_size': 32,
                'epochs': 30,
                'pctl_sampling': 'uniform',
            },
        }

    if False:
        # half degree model
        task = {
            'task_id':
            task_id,
            'dataset_path':
            'finale_center',
            'model_config':
            CategoricalModel.create(tmp_model_path,
                                    use_adadelta=True,
                                    learning_rate=0.001,
                                    thresholds=np.linspace(-0.061, 0.061,
                                                           14)[1:-1],
                                    input_shape=(120, 320, 3)),
            'training_args': {
                'pctl_sampling': 'uniform',
                'batch_size': 32,
                'epochs': 20,
            },
        }

    if False:
        input_model_config = {
            'model_uri': 's3://sdc-matt/simple/1477715388/model.h5',
            'type': 'simple',
            'cat_classes': 5
        }

        ensemble_model_config = EnsembleModel.create(tmp_model_path,
                                                     input_model_config,
                                                     timesteps=3,
                                                     timestep_noise=0.1,
                                                     timestep_dropout=0.5)

        task = {
            'task_id': task_id,
            'dataset_path': 'final_training',
            'model_config': ensemble_model_config,
            'training_args': {
                'batch_size': 64,
                'epochs': 3
            },
        }

    if False:
        lstm_model_config = LstmModel.create(tmp_model_path, (10, 120, 320, 3),
                                             timesteps=10,
                                             W_l2=0.0001,
                                             scale=60.0)

        task = {
            'task_id': task_id,
            'dataset_path': 'showdown_full',
            'final': True,
            'model_config': lstm_model_config,
            'training_args': {
                'pctl_sampling': 'uniform',
                'batch_size': 32,
                'epochs': 10,
            },
        }

    handle_task(task)
예제 #11
0
        dev_with_label=constant.dev_with_label,
        include_test=constant.include_test)
    results = []

    for i in range(constant.num_split):
        data_loader_train = data_loaders_train[i]
        data_loader_val = data_loaders_val[i]
        data_loader_test = data_loaders_test[i]

        print("###### EXPERIMENT {} ######".format(i + 1))
        print("(EXPERIMENT %d) Create the model" % (i + 1))
        if constant.model == "LSTM":
            model = LstmModel(vocab=vocab,
                              embedding_size=constant.emb_dim,
                              hidden_size=constant.hidden_dim,
                              num_layers=constant.n_layers,
                              is_bidirectional=constant.bidirec,
                              input_dropout=constant.drop,
                              layer_dropout=constant.drop,
                              attentive=constant.attn)
        elif constant.model == "UTRS":
            model = UTransformer(vocab=vocab,
                                 embedding_size=constant.emb_dim,
                                 hidden_size=constant.hidden_dim,
                                 num_layers=constant.hop,
                                 num_heads=constant.heads,
                                 total_key_depth=constant.depth,
                                 total_value_depth=constant.depth,
                                 filter_size=constant.filter,
                                 act=constant.act)
        elif constant.model == "ELMO":
            model = ELMoEncoder(C=4)