예제 #1
0
파일: main.py 프로젝트: wesley1001/MACNN
def main(_):
    train_set = [path + '\\' + data_name + '\\' + data_name + '_TRAIN']
    test_set = [path + '\\' + data_name + '\\' + data_name + '_TEST']
    model_url = path + '\\' + 'cnn' + '\\' + data_name + '\\'
    best = 0.0
    data, label = get_data(train_set, data_set.length, data_set.classes_num,
                           batch_size, False)

    if retrain:
        shutil.rmtree(model_url)

    model = Net()

    hps = {
        'learning_rate': learning_rate,
    }

    estimator = tf.estimator.Estimator(model.model_fn, model_url, params=hps)
    logging_hook = tf.train.LoggingTensorHook({},
                                              every_n_iter=100,
                                              at_end=True)

    for i in range(125):
        estimator.train(lambda: get_data(train_set, data_set.length, data_set.
                                         classes_num, batch_size, True),
                        [logging_hook],
                        steps=steps)

        result = estimator.evaluate(
            lambda: get_data(test_set, data_set.length, data_set.classes_num,
                             data_set.test_size, False),
            steps=1)

        if best < result['accuracy']:
            best = result['accuracy']
        print('The best accuracy is', best)
        print('The best error is', 1 - best)
예제 #2
0
import pandas as pd
import pickle
import numpy as np
from sklearn.cross_validation import KFold, StratifiedKFold
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from submissions.submit import make_submission
from data_prepare import get_data, get_sub
from utils import Timer
from itertools import product

with Timer():
    X_train, X_test, features = get_data()
    print('Data loaded')
validate = False
RF = False
depths = [10]
etas = [0.08]  # Learning rate
alphas = [0.2]  # L1 weight penalty
num_est = [100]

log_file = open('./submissions/model_params.log', 'w+')
headers = ("eta  \t alpha  \t num_est \t depth   \t loss_train \t "
           "loss_val \t loss_test\n")
log_file.write(headers)

for depth, eta, alpha, num_est in product(depths, etas, alphas, num_est):
예제 #3
0
파일: train.py 프로젝트: MiniBee/HongNLP
        batch_inputs = torch.zeros((batch_size, max_doc_len, max_sent_len),
                                   dtype=torch.int64)
        batch_masks = torch.zeros((batch_size, max_doc_len, max_sent_len),
                                  dtype=torch.int64)
        batch_labels = torch.LongTensor(doc_labels)

        for b in range(batch_size):
            for sent_idx in range(doc_lens[b]):
                sent_data = batch_data[b][2][sent_idx]  # 表示一个句子
                for word_idx in range(sent_data[0]):
                    batch_inputs[b, sent_idx,
                                 word_idx] = sent_data[1][word_idx]
                    batch_masks[b, sent_idx, word_idx] = 1

        if use_cuda:
            batch_inputs = batch_inputs.to(device)
            batch_masks = batch_masks.to(device)
            batch_labels = batch_labels.to(device)

        return (batch_inputs, batch_masks), batch_labels


if __name__ == '__main__':
    train_data, dev_data, test_data = get_data(fold_num, dev_fold)
    vocab = Vocab(train_data)
    model = Model(vocab)
    trainer = Trainer(model, vocab, train_data, dev_data, test_data)
    trainer.train()
    print(vocab._label2id)
예제 #4
0
파일: new_test.py 프로젝트: chenhq/test231
    kline2_params = {
        'window': 256,
    }
    params_list.append(kline2_params)
    func_list.append(feature_kline2)

    label_by_multi_ma_params = {
        'window': [3, 5, 10]
    }
    params_list.append(label_by_multi_ma_params)
    func_list.append(label_by_multi_ma)

    construct_feature_func = partial(construct_features, params_list=params_list, func_list=func_list, test=True)

    ohlcv_list = get_data(file_name="~/cs_market.csv", stks=['002277.XSHE'])

    stk_features_list = construct_features_for_stocks(ohlcv_list, construct_feature_func)

    print(len(stk_features_list))
    print(stk_features_list[0].columns)
    # i_columns = ['ma_1', 'ma_2', 'ma_3', 'ma_5', 'ma_8', 'ma_13', 'ma_21', 'ma_34', 'ma_55']
    f = stk_features_list[0]
    f = f.reset_index().reset_index()
    print(f.columns)
    fig, ax = plt.subplots(1, figsize=(21, 7))
    f.loc[:, 'close'].plot(figsize=(21, 7))
    f[f["label"] == -1].plot.scatter(x='index', y='close', s=15, c='green', figsize=(21, 7), ax=ax,
                                     label="down")
    f[f["label"] == 1].plot.scatter(x='index', y='close', s=15, c='red', figsize=(21, 7), ax=ax,
                                    label="up")
def create_strategy(filename: str,
                    columns_list: List[str],
                    som_width: int,
                    som_height: int,
                    n_iter: int,
                    sigma=0.3,
                    learning_rate=0.01) -> tuple:
    """
    Creates strategy which can be used in testing part of the script.
    - reads preprocessed split into training and testing sets data
    - train som model
    - calculates mean profit per cluster in training and testing dataset
    - gets mean profits
    Arguments:
        filename: name of file with data
        columns_list: list of columns which should be left in the training data
        som_width: width of som map
        som_height: height of som map
        n_iter: number of iterations in som map
        sigma: sigma parameter for som map
        learning_rate: learning rate for som map
    Returns:
        len(df_profit_per_cluster_train): amount of used clusters in training data
        len(df_profit_per_cluster_test): amount of used clusters in testing data
        buy_clusters_mean_profit_train: mean profit in buy clusters for training data
        sell_clusters_mean_profit_train: mean profit in sell clusters for training data
        buy_clusters_mean_profit_test: mean profit in buy clusters for testing data
        sell_clusters_mean_profit_test: mean profit in sell clusters for testing data
    """
    # get prepared data
    df, df_prepared, df_train, df_test, df_train_columns = get_data(
        filename, columns_list)

    # train som
    final_df_train, final_df_test = train_som(som_width,
                                              som_height,
                                              df,
                                              df_train,
                                              df_test,
                                              df_train_columns,
                                              n_iter,
                                              sigma=sigma,
                                              learning_rate=learning_rate)

    # get profit per cluster in train and test datasets
    df_profit_per_cluster_train = get_profit_per_cluster(final_df_train)
    df_profit_per_cluster_test = get_profit_per_cluster(final_df_test)

    # get mean profit for sell and buy class in training and testing datasets
    try:
        buy_clusters_mean_profit_train, buy_clusters_list, sell_clusters_mean_profit_train, sell_clusters_list = \
            get_mean_profit_per_class_from_train_df(df_profit_per_cluster_train)

        buy_clusters_mean_profit_test, sell_clusters_mean_profit_test = \
            get_mean_profit_per_class_from_test_df(df_profit_per_cluster_test, buy_clusters_list, sell_clusters_list)
    # if the data was assigned to less than to 3 clusters
    except:
        buy_clusters_mean_profit_train, sell_clusters_mean_profit_train, \
        buy_clusters_mean_profit_test, sell_clusters_mean_profit_test = None, None, None, None

    return len(df_profit_per_cluster_train), len(df_profit_per_cluster_test), \
           buy_clusters_mean_profit_train, sell_clusters_mean_profit_train, \
           buy_clusters_mean_profit_test, sell_clusters_mean_profit_test
def create_final_strategy(filename: str,
                          columns_list: List[str],
                          som_width=Config.som_width,
                          som_height=Config.som_height,
                          n_iter=Config.n_iter,
                          sigma=Config.sigma,
                          learning_rate=Config.learning_rate) -> tuple:
    """
    Used for creating a final strategy (not for testing)
    - reads preprocessed split into training and testing sets data
    - train som model
    - calculates mean profit per cluster in training dataset
    - gets list of sell and buy clusters
    Arguments:
        filename: name of file with data
        columns_list: list of columns which should be left in the training data
        som_width: width of som map
        som_height: height of som map
        n_iter: number of iterations in som map
        sigma: sigma parameter for som map
        learning_rate: learning rate for som map
    Returns:
        final_df_train: training dataset
        final_df_test: testing dataset
        buy_clusters_list: list of buy clusters
        sell_clusters_list: list of sell clusters
    """

    print(
        f'Creating final strategy for parameters: \nmap_size: {som_height}\nn_iter: {n_iter}\nsigma:{sigma}\nlr: {learning_rate}'
    )
    # get prepared data
    df, df_prepared, df_train, df_test, df_train_columns = get_data(
        filename, columns_list)

    # train som
    final_df_train, final_df_test = train_som(som_width,
                                              som_height,
                                              df,
                                              df_train,
                                              df_test,
                                              df_train_columns,
                                              n_iter,
                                              sigma=sigma,
                                              learning_rate=learning_rate)

    # get profit per cluster in train datasets
    df_profit_per_cluster_train = get_profit_per_cluster(final_df_train)

    assert len(df_profit_per_cluster_train
               ) >= 3, "Algorithm, returned less than 3 clusters."

    df_profit_per_cluster = df_profit_per_cluster_train.sort_values(
        by='profit', ascending=False)
    group_size = int(len(df_profit_per_cluster) / 3)

    buy_clusters_list = list(
        df_profit_per_cluster.iloc[:group_size]['cluster'])
    sell_clusters_list = list(
        df_profit_per_cluster.iloc[-group_size:]['cluster'])

    return final_df_train, final_df_test, buy_clusters_list, sell_clusters_list
예제 #7
0
    label_by_ma_price_params = {
        'window': 250,
        'next_ma_window': 3,
        'quantile_list': [0, 0.1, 0.3, 0.7, 0.9, 1]
    }
    params_list.append(label_by_ma_price_params)
    func_list.append(label_by_ma_price)

    construct_feature_func = partial(construct_features,
                                     params_list=params_list,
                                     func_list=func_list,
                                     test=True)

    data_set, reverse_func = get_data(
        file_name="E:\market_data/cs_market.csv",
        stks=zz500[200:205],
        construct_feature_func=construct_feature_func,
        split_dates=["2016-01-01", "2017-01-01"])

    for tag in ['train', 'validate', 'test']:
        data_set[tag]['label2'] = data_set[tag]['label'].map(reverse_func)
        labels = data_set[tag]['label2'].unique().tolist()
        labels.sort()
        print(tag)
        for label in labels:
            selected = data_set[tag][data_set[tag]['label2'] == label]
            print("{}: {}".format(label, len(selected) / len(data_set[tag])))

    idx_slice = pd.IndexSlice
    stks = data_set['train'].index.get_level_values('code').unique().tolist()
    stks.sort()