self.dict_test_and_train['X_train'] = np.concatenate(
            [self.dict_test_and_train['X_train'], in_data['X_train']], axis=1)

    def get_start_end_date(self):
        print(self.data['start_end_date'])
        start = md.get_date_from_UTC_ms(self.data['start_end_date']['start'])
        end = md.get_date_from_UTC_ms(self.data['start_end_date']['end'])
        return start, end


from copy import copy

start_date = md.get_datetime_from_string('2017-01-19')
end_date = md.get_datetime_from_string('2018-05-1')

feature_set = feature_list.get_feature_set()
feature_sets = feature_set[1:]
reference_group = feature_set[0]
dict_perf_feats = {}
break_flag = False
for analysis_set in feature_sets[20:]:
    if (break_flag):
        break
    for feature in analysis_set:
        dict_perf_feats[feature] = []
    for secondary_set in feature_sets:
        if (analysis_set != secondary_set):
            features = copy(reference_group)
            features.extend(analysis_set)
            features.extend(secondary_set)
            ##fit, return feature importances
Esempio n. 2
0
import mylib_dataset as md
import tensorflow as tf
from tensorflow.contrib.tensor_forest.client import random_forest
import matplotlib.pyplot as plt

##################### get log for estimator
proc_time_start = datetime.datetime.now()

##### COMMENT
## train data has low variance, better choose from test data
####

path_features_imp = '/home/catalin/git_workspace/disertatie/dict_perf_feats.pkl'
ordered_values_mean, ordered_values_var = md.get_feature_importances_mean(
    path_features_imp)
features = feature_list.get_feature_set()[0]
features.extend(ordered_values_mean['keys'][:30])
features = feature_list.get_features_list()
blockchain_indicators = feature_list.get_blockchain_indicators()
#features = feature_list.get_features_list()
import pickle
with open('/home/catalin/python/force_data.pickle', 'rb') as handle:
    force_data = pickle.load(handle)

max_depths = []
for i in range(1, 50):
    max_depths.append(i)

roc_curves = {}
auc_scores = {}
for max_depth in max_depths: