Esempio n. 1
0
def main(argv):
    args = parser.parse_args(argv[1:])

    beg_date = '2015-01-01'
    funds = ['002001_Nav']
    learning_rate = 0.0001
    drop_out = 0.5
    train_steps = 5000
    df_filtered = fund_Analysis(beg_date, funds)

    train_sets, cv_sets, test_sets = fund_data_proprocessing(
        beg_date, funds, df_filtered, 'Week')
    test_features_data, features_name, test_labels = getTFDataSets(test_sets)
    # train_features_data, _, train_labels = getTFDataSets(train_sets)
    # cv_features_data, _, cv_labels = getTFDataSets(cv_sets)

    # Define Esitmaters
    feature_cols = [tf.feature_column.numeric_column(k) for k in features_name]

    classifier = tf.estimator.DNNClassifier(
        n_classes=3,
        feature_columns=feature_cols,
        hidden_units=[1024, 512, 128],
        optimizer=tf.train.AdamOptimizer(learning_rate),
        dropout=drop_out)
Esempio n. 2
0
def main():
    beg_date = '2004-01-01'
    funds = ['002001_Nav']
    period = 25
    df_filtered = fund_Analysis(beg_date, funds)
    train_sets, cv_sets, test_sets = fund_data_proprocessing(
        beg_date,
        funds,
        df_filtered,
        degroup='Roll',
        split_portion=0.15,
        period=period)
    test_features_data, features_name, test_labels = getTFDataSets(
        test_sets, period)
    train_features_data, _, train_labels = getTFDataSets(train_sets, period)
    cv_features_data, _, cv_labels = getTFDataSets(cv_sets, period)

    X = np.append(np.append(train_features_data, cv_features_data, axis=0),
                  test_features_data,
                  axis=0)
    X_2 = np.append(train_features_data, cv_features_data, axis=0)
    y = np.append(np.append(train_labels, cv_labels, axis=0),
                  test_labels,
                  axis=0)
    y_2 = np.append(train_labels, cv_labels, axis=0)

    print "Sample Size: {}".format(X_2.shape)
    print "Labels size: {}".format(y_2.shape)

    pca = PCA(X, ncomp=200)
    print pca.factors.shape
    print pca.ic
    print pca.eigenvals
Esempio n. 3
0
def main(argv):
    args = parser.parse_args(argv[1:])

    beg_date = '2015-01-01'
    # funds = ['002001_Nav']
    funds = ['240020_Nav']
    train_steps = 2000
    df_filtered = fund_Analysis(beg_date, funds)

    train_sets, cv_sets, test_sets = fund_data_proprocessing(
        beg_date, funds, df_filtered, 'Week')
    # print train_sets.keys()
    # print train_sets['sample_sets'][0]
    #'''
    test_features_data, features_name, test_labels = getTFDataSets(test_sets)
    train_features_data, _, train_labels = getTFDataSets(train_sets)
    cv_features_data, _, cv_labels = getTFDataSets(cv_sets)

    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_features_data},
        y=train_labels,
        batch_size=50,
        num_epochs=None,
        shuffle=False)
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": cv_features_data}, y=cv_labels, shuffle=False)
    pred_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": test_features_data}, shuffle=False)

    # Define Esitmaters
    feature_cols = [tf.feature_column.numeric_column(k) for k in features_name]

    # tensors_to_log = {'probabiliteis': 'Softmax_probabilities'}
    # tensors_to_log = {'accuracy': 'system_accuracy'}
    # logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)

    classifier = tf.estimator.Estimator(
        model_fn=lstm_model_fn,
        model_dir=
        "/home/ghao/PycharmProjects/Project_FundsAnalysis/LSTM_MultiCells")

    # train_op = classifier.train(input_fn=train_input_fn, max_steps=train_steps, hooks=[logging_hook])
    train_op = classifier.train(input_fn=train_input_fn, max_steps=train_steps)
    # print train_op
    eval_results = classifier.evaluate(input_fn=eval_input_fn,
                                       checkpoint_path=None)
    print eval_results
    # prediction_results = classifier.predict(input_fn=pred_input_fn, checkpoint_path=None)
    prediction_results = list(
        classifier.predict(input_fn=pred_input_fn, checkpoint_path=None))

    for each_result in prediction_results:
        print each_result['probabilities'], each_result['classes']
Esempio n. 4
0
def main():
    beg_date = '2004-01-01'
    funds = ['002001_Nav']
    period = 25
    df_filtered = fund_Analysis(beg_date, funds)
    train_sets, cv_sets, test_sets = fund_data_proprocessing(
        beg_date,
        funds,
        df_filtered,
        degroup='Roll',
        split_portion=0.15,
        period=period)
    test_features_data, features_name, test_labels = getTFDataSets(
        test_sets, period)
    train_features_data, _, train_labels = getTFDataSets(train_sets, period)
    cv_features_data, _, cv_labels = getTFDataSets(cv_sets, period)

    X = np.append(np.append(train_features_data, cv_features_data, axis=0),
                  test_features_data,
                  axis=0)
    X_2 = np.append(train_features_data, cv_features_data, axis=0)
    y = np.append(np.append(train_labels, cv_labels, axis=0),
                  test_labels,
                  axis=0)
    y_2 = np.append(train_labels, cv_labels, axis=0)

    print "Sample Size: {}".format(X_2.shape)
    print "Labels size: {}".format(y_2.shape)

    knn = KNeighborsClassifier(n_neighbors=18)
    knn_scores = cross_val_score(knn, X, y, cv=5)
    print "\n Knn_Score:"
    print knn_scores
    print knn_scores.mean()

    knn.fit(X_2, y_2)
    pre = knn.predict(test_features_data)
    metrixReport(test_labels, pre)

    knn_bag = BaggingClassifier(
        base_estimator=KNeighborsClassifier(n_neighbors=10),
        max_samples=0.7,
        max_features=0.7,
        n_estimators=5)
    knn_bag_scores = cross_val_score(knn_bag, X, y, cv=5)
    print "\n Knn_bag_score"
    print knn_bag_scores
    print knn_bag_scores.mean()

    knn_bag.fit(X_2, y_2)
    pre = knn_bag.predict(test_features_data)
    metrixReport(test_labels, pre)
    '''
Esempio n. 5
0
def main(argv):
    args = parser.parse_args(argv[1:])

    beg_date = '2015-01-01'
    funds = ['002001_Nav']
    learning_rate = 0.0001
    drop_out = 0.5
    train_steps = 5000
    df_filtered = fund_Analysis(beg_date, funds)
    train_sets, cv_sets, test_sets = fund_data_proprocessing(
        beg_date, funds, df_filtered)

    test_features_data, features_name, test_labels = getTFDataSets(test_sets)
    train_features_data, _, train_labels = getTFDataSets(train_sets)
    # cv_features_data, _, cv_labels = getTFDataSets(cv_sets)

    # Define Esitmaters
    feature_cols = [tf.feature_column.numeric_column(k) for k in features_name]

    # feature_cols = [tf.feature_column.numeric_column('feature', shape=[1, 395])]

    classifier = tf.estimator.DNNClassifier(
        n_classes=3,
        feature_columns=feature_cols,
        hidden_units=[1024, 512, 128],
        optimizer=tf.train.AdamOptimizer(learning_rate),
        dropout=drop_out)
    # sess = tf.Session()
    # sess.run(iterator.initializer, feed_dict={fea_holder: train_features_data, la_holder: train_labels})
    # data = train_input_fn(train_features_data, train_labels)

    train_op = classifier.train(
        input_fn=lambda: train_input_fn(train_features_data, train_labels),
        steps=train_steps)

    accuracy_op = classifier.evaluate(
        input_fn=lambda: test_input_fn(test_features_data, test_labels))
    accuracy_op = accuracy_op['accuracy']
    print("\nTest Accuracy: {0:f}%\n".format(accuracy_op * 100))
Esempio n. 6
0
def main(argv):
    args = parser.parse_args(argv[1:])

    beg_date = '2015-01-01'
    funds = ['002001_Nav']
    train_steps = 200
    df_filtered = fund_Analysis(beg_date, funds)

    train_sets, cv_sets, test_sets = fund_data_proprocessing(
        beg_date, funds, df_filtered, 'Week')
    test_features_data, features_name, test_labels = getTFDataSets(test_sets)
    train_features_data, _, train_labels = getTFDataSets(train_sets)
    cv_features_data, _, cv_labels = getTFDataSets(cv_sets)

    # Define Esitmaters
    feature_cols = [tf.feature_column.numeric_column(k) for k in features_name]

    # classifier = tf.estimator.Estimator(model_fn=lambda dataset, mode: lstm_model_fn(dataset, mode), model_dir="/lstm_model")

    tensors_to_log = {'probabiliteis': 'Softmax_probabilities'}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)

    classifier = tf.estimator.Estimator(
        model_fn=lstm_model_fn,
        model_dir="/home/marshao/DataMiningProjects/Project_FundsAnalysis/LSTM"
    )

    #train_op = classifier.train(input_fn=lambda: train_input_fn(train_features_data, train_labels),
    #                            max_steps=train_steps, hooks=[logging_hook])
    # print train_op
    # eval_results = classifier.evaluate(input_fn=lambda: train_input_fn(cv_features_data, cv_labels), checkpoint_path=None)
    # print eval_results
    prediction_results = list(
        classifier.predict(input_fn=lambda: test_input_fn(test_features_data),
                           checkpoint_path=None))
    print prediction_results[0]['probabilities']
    print prediction_results[0]['classes']
Esempio n. 7
0
def main():
    beg_date = '2004-01-01'
    funds = ['002001_Nav']
    period = 25
    df_filtered = fund_Analysis(beg_date, funds)
    train_sets, cv_sets, test_sets = fund_data_proprocessing(
        beg_date,
        funds,
        df_filtered,
        degroup='Roll',
        split_portion=0.15,
        period=period)
    test_features_data, features_name, test_labels = getTFDataSets(
        test_sets, period)
    train_features_data, _, train_labels = getTFDataSets(train_sets, period)
    cv_features_data, _, cv_labels = getTFDataSets(cv_sets, period)

    X = np.append(np.append(train_features_data, cv_features_data, axis=0),
                  test_features_data,
                  axis=0)
    X_2 = np.append(train_features_data, cv_features_data, axis=0)
    y = np.append(np.append(train_labels, cv_labels, axis=0),
                  test_labels,
                  axis=0)
    y_2 = np.append(train_labels, cv_labels, axis=0)

    print "Sample Size: {}".format(X_2.shape)
    print "Labels size: {}".format(y_2.shape)

    pca = PCA(n_components=200)

    pca_X_2 = pca.fit_transform(X_2)
    pca_test = pca.fit_transform(test_features_data)
    print "PCAed Sample Size: {}".format(pca_X_2.shape)

    knn = KNeighborsClassifier(n_neighbors=18)
    knn_scores = cross_val_score(knn, X, y, cv=5)
    print "\n Knn_Score:"
    print knn_scores
    print knn_scores.mean()

    print "\n KNN no PCA"
    knn.fit(X_2, y_2)
    pre = knn.predict(test_features_data)
    metrixReport(test_labels, pre)
    print "\n KNN after PCA"
    knn.fit(pca_X_2, y_2)
    pre = knn.predict(pca_test)
    metrixReport(test_labels, pre)

    knn_bag = BaggingClassifier(
        base_estimator=KNeighborsClassifier(n_neighbors=10),
        max_samples=0.7,
        max_features=0.7,
        n_estimators=5)
    knn_bag_scores = cross_val_score(knn_bag, X, y, cv=5)
    print "\n Knn_bag_score"
    print knn_bag_scores
    print knn_bag_scores.mean()

    print "\n KNN bag no PCA"
    knn_bag.fit(X_2, y_2)
    pre = knn_bag.predict(test_features_data)
    metrixReport(test_labels, pre)
    print "\n KNN bag after PCA"
    knn_bag.fit(pca_X_2, y_2)
    pre = knn_bag.predict(pca_test)
    metrixReport(test_labels, pre)

    random_forest = RandomForestClassifier(max_depth=5,
                                           max_features=0.5,
                                           n_estimators=10)
    random_forest_score = cross_val_score(random_forest, X, y, cv=5)
    print "\n Random Forest Score:"
    print random_forest_score
    print random_forest_score.mean()

    print "\n RF no PCA"
    random_forest.fit(X_2, y_2)
    pre = random_forest.predict(test_features_data)
    metrixReport(test_labels, pre)
    print "\n RF after PCA"
    random_forest.fit(pca_X_2, y_2)
    pre = random_forest.predict(pca_test)
    metrixReport(test_labels, pre)
    '''
Esempio n. 8
0
from __future__ import print_function

from C_Fund_Analysis import fund_Analysis, fund_data_proprocessing
import numpy as np
import pandas as pd
import tensorflow as tf

beg_date = '2015-01-01'
funds = ['002001_Nav']
df_filtered = fund_Analysis(beg_date, funds)
train_sets, cv_sets, test_sets = fund_data_proprocessing(
    beg_date, funds, df_filtered)


def getFeatures(samples):
    array_z = np.zeros((1, 395), dtype=np.float32)
    for sample in samples:
        row, col = sample.shape
        columns = sample.columns
        em_rows = 5 - row
        if em_rows > 0:
            df = pd.DataFrame(np.zeros((em_rows, col)), columns=columns)
            sample = pd.concat([sample, df])
        if em_rows < 0:
            sample = sample.iloc[1:, :]

        if array_z[0, 0] == 0:
            array = np.array(sample.values)
            array_z = np.reshape(array, (1, -1))
        else:
            array = np.array(sample.values)
Esempio n. 9
0
def main(argv):
    args = parser.parse_args(argv[1:])

    beg_date = '2015-01-01'
    funds = ['002001_Nav']
    train_steps = 4000
    df_filtered = fund_Analysis(beg_date, funds)

    train_sets, cv_sets, test_sets = fund_data_proprocessing(
        beg_date, funds, df_filtered, 'Week')
    test_features_data, features_name, test_labels = getTFDataSets(test_sets)
    train_features_data, _, train_labels = getTFDataSets(train_sets)
    cv_features_data, _, cv_labels = getTFDataSets(cv_sets)

    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_features_data},
        y=train_labels,
        batch_size=50,
        num_epochs=None,
        shuffle=False)
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": cv_features_data},
        y=cv_labels,
        # batch_size=50,
        # num_epochs=None,
        shuffle=False)
    pred_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": test_features_data},
        # For prediction, the batch_size and num_epochs should not be there,
        # otherewise, the prediction will be ends in infinite loops
        # batch_size=50,
        #num_epochs=None,
        shuffle=False)

    # Define Esitmaters
    feature_cols = [tf.feature_column.numeric_column(k) for k in features_name]

    # classifier = tf.estimator.Estimator(model_fn=lambda dataset, mode: lstm_model_fn(dataset, mode), model_dir="/lstm_model")

    tensors_to_log = {'probabiliteis': 'Softmax_probabilities'}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)

    classifier = tf.estimator.Estimator(
        model_fn=lstm_model_fn,
        model_dir="/home/marshao/DataMiningProjects/Project_FundsAnalysis/LSTM"
    )

    train_op = classifier.train(input_fn=train_input_fn,
                                max_steps=train_steps,
                                hooks=[logging_hook])
    # print train_op
    eval_results = classifier.evaluate(input_fn=eval_input_fn,
                                       checkpoint_path=None)
    print eval_results
    #prediction_results = classifier.predict(input_fn=pred_input_fn, checkpoint_path=None)
    prediction_results = list(
        classifier.predict(input_fn=pred_input_fn, checkpoint_path=None))

    for each_result in prediction_results:
        print each_result['probabilities'], each_result['classes']