def main(argv): if argv[1] == 'train_process': get_data() data_preparation = DataPreparation() data_preparation.generate_data_for_model() train_model = Train() train_model.compute_locations_models() prediction = Prediction() prediction.get_models() create_dashboard(prediction)
def __init__(self, model_deciding=None): get_time() self.data_path = data_path if model_deciding == 'all' else features_data_path self.data = get_data(path=self.data_path, is_for_model=False) self.columns = list(self.data.columns) self.features = decide_feature_name(feature_path) self.model_deciding = model_deciding
def plot_subject(subject, session, options): """Load, Process and plot data for one subject/session pair. Possible options: ('ecg','gsr','blocks','trials','plot_beats','show','save') """ # transform all keys to lower case options = {key.lower():value for key, value in options.items()} # add missing keys to options and transform to namedtuple option_keys =('do_ecg','do_gsr','do_blocks','do_trials') default_opts = dict.fromkeys(option_keys, False) default_opts.update(options) if 'figsize' not in default_opts: default_opts['figsize'] = (15,10) options = namedtuple('Options', default_opts.keys())(**default_opts) physio_data, trials, time_range = da.get_data(subject, session) results = dpp.process_data(physio_data, trials, subject, session, options) names,figs = plot_results(results, options) start_time = datetime.fromtimestamp(time_range[0]).strftime('%Y-%m-%d %H:%M:%S') end_time = datetime.fromtimestamp(time_range[1]).strftime('%Y-%m-%d %H:%M:%S') print 'start_time:', start_time, ' end_time:', end_time if options.save: for name,fig in zip(names,figs): filename = subject + '_' + session + '_' + name save_plot(filename, fig) if options.show: plt.show()
def __init__(self, hyper_parameters=None, model_deciding=None, last_day_predictor=None, params=None): get_time() self.data = get_data(main_data_path + params['args']['data'], True) # data that created at feature engineering self.features = list( decide_feature_name(main_data_path + params['args']['feature_set']).keys()) self.params = hyper_parameters # get hyper parameters for model: hyper_parameters.json self.model_params = params self.train, self.test = None, None self.X = None self.optimum_cluster_centroids = None self.centroids = None self.po_data = None # Possible_outlier_transactions data self.model_dbscan = None self.m_s, self.eps = [], [] self.o_min_sample = None self.o_epsilon = None self.o_devision = None self.last_day_predictor = last_day_predictor # splitting data indicator self.uids = None
def main(args): logger.get_time() if is_local_run: args = sample_args sys.stdout = logger.Logger() print("*"*3, " args :", args) if len(args) != 0: if (args[1]) == 'feature_engineering': """ run from terminal: python main.py feature_engineering all all: create all features which are at features.json Ex: 'python main.py feature_engineering c_m_ratios' create only 'c_m_ratios' adds to features set. """ create_feature = CreateFeatures(model_deciding=args[2]) create_feature.compute_features() if (args[1]) == 'train_process': # TODO: description must be updated """ run from terminal: python main.py train_process 0 0/1: 0; test data splits from date 1: test data is last day of each customer Models: isolation forest and AutoEncoder for Multivariate and Univariate Models """ train = trainModel(args=args) train.process() if sys.argv[1] == 'prediction': # TODO: description must be updated """ run from terminal: python main.py prediction 0 0/1: 0; test data splits from date 1: test data is last day of each customer It creates prediction values for each transaction is added to raw data set """ prediction = trainModel(args=args, is_prediction=True) prediction.process() if args[1] == 'dashboard': # TODO: description must be updated """ run from terminal: python main.py dashboard 0 # 10.20.10.196:3030 run from terminal: python main.py dashboard 0 uni # 10.20.10.196:3031 0/1: 0; test data splits from date 1: test data is last day of each customer uni: creates only for univariate models. I order to run for multivariate dashboard assign null Dashboard for Multi - Uni Models is created """ # TODO: get prediction data from predicted .csv file model = ModelTrainIsolationForest(last_day_predictor=int(args[2])) model.train_test_split() create_dahboard(model.train, get_data(pred_data_path, True)) logger.get_time()
def __init__(self, hyper_parameters=None, last_day_predictor=None, params=None): get_time() self.data = get_data(features_data_path, True) # data that created at feature engineering # TODO: get specific feature from specific model. self.features = list(decide_feature_name(feature_path).keys()) self.params = hyper_parameters # get hyper parameters for model: hyper_parameters.json self.model_params = params self.train, self.test = None, None self.X = None self.model_e_iso_f = None self.last_day_predictor = last_day_predictor # splitting data indicator
def __init__(self, hyper_parameters=None, last_day_predictor=None, params=None): get_time() self.data = get_data(features_data_path, True) self.features = list(decide_feature_name(feature_path).keys()) self.params = hyper_parameters self.last_day_predictor = last_day_predictor self.model_params = params self.train, self.test = None, None self.X, self.y_pred, self.y = None, None, None self.input, self.fr_output = None, None self.model_ae, self.model_ae_l, self.model_u = None, None, None self.gpu_devices = [ d for d in device_lib.list_local_devices() if d.device_type == "GPU" ] if run_gpu else []
def plot_subject(subject, session, options): """Load, Process and plot data for one subject/session pair. Possible options: ('ecg','gsr','blocks','trials','plot_beats','show','save') """ # transform all keys to lower case options = {key.lower(): value for key, value in options.items()} # add missing keys to options and transform to namedtuple option_keys = ('do_ecg', 'do_gsr', 'do_blocks', 'do_trials') default_opts = dict.fromkeys(option_keys, False) default_opts.update(options) if 'figsize' not in default_opts: default_opts['figsize'] = (15, 10) options = namedtuple('Options', default_opts.keys())(**default_opts) physio_data, trials, time_range = da.get_data(subject, session) results = dpp.process_data(physio_data, trials, subject, session, options) names, figs = plot_results(results, options) start_time = datetime.fromtimestamp( time_range[0]).strftime('%Y-%m-%d %H:%M:%S') end_time = datetime.fromtimestamp( time_range[1]).strftime('%Y-%m-%d %H:%M:%S') print 'start_time:', start_time, ' end_time:', end_time if options.save: for name, fig in zip(names, figs): filename = subject + '_' + session + '_' + name save_plot(filename, fig) if options.show: plt.show()
""" run from terminal: python main.py train_process 0 0/1: 0; test data splits from date 1: test data is last day of each customer Models: isolation forest & AutoEncoder & DBScan """ train = trainModel(args=sys.argv) train.process() if sys.argv[1] == 'prediction': """ run from terminal: python main.py prediction 0 0/1: 0; test data splits from date 1: test data is last day of each customer It creates prediction values for each transaction is added to raw data set """ prediction = trainModel(args=sys.argv, is_prediction=True) prediction.process() if sys.argv[1] == 'dashboard': """ run from terminal: python main.py dashboard 0 0/1: 0; test data splits from date 1: test data is last day of each customer """ model = ModelTrainIsolationForest( last_day_predictor=int(sys.argv[2])) model.train_test_split() create_dahboard(model.train, get_data(pred_data_path, True)) logger.get_time()
def save_raw_table(): #subjects = [403, 416, 421, 424, 430, 433, 434, 437, 419, 420, 425, 426, 428, 429, 432, 436] #subjects = [314, 319,321,323,325,326,327,328,332,333] subjects = [312,314,315,317,320,322, 329,330,332,403,416,419, 420,421,424,425,426,428, 430,432,433,436,437] path = da.config['PATH']['physio_path'] version_major = 5 version_minor = 0 path = os.path.join(path, 'gsr_to_gamedata_table_v' + str(version_major) + '.' + str(version_minor) + '.csv') options = { 'do_gsr' : True, 'do_trials' : True, 'only_success' : False, 'silent' : True, 'overwrite' : True, } # transform all keys to lower case options = {key.lower():value for key, value in options.items()} # add missing keys to options and transform to namedtuple option_keys =('do_ecg','do_gsr','do_blocks','do_trials','only_success', 'silent','overwrite') default_opts = dict.fromkeys(option_keys, False) default_opts.update(options) # convert to named tuple for easier access options = namedtuple('Options', default_opts.keys())(**default_opts) # if file already exists cancel everything if not options.overwrite and os.path.exists(path): print path, 'file already exists' return # write column names to csv file head = 'subject,session,physio_time,raw_gsr,condition,trial_id,success\n' with open(path,'w') as f: f.write(head) # bring subject and session in a form to easy iterate over sessions = [1,2]*len(subjects) subjects = itertools.chain(*zip(subjects,subjects)) for subject, session in zip(subjects, sessions) : print 'Processing subject %s session %s' % (subject, session) subject = str(subject) session = str(session) # skip try: physio_data, trials, time_range = da.get_data(subject, session, options.only_success, options.silent) except da.DataAccessError as e: print('Skip subject %s session %s: %s' % (subject, session, e)) continue time_scale = np.array(physio_data['time']) #print len(time_range), len(time_scale) if len(time_scale) == 0 : raise Exception('not physio data') results = dpp.process_data(physio_data, trials, subject, session, options) raw_gsr = physio_data['gsr'] cond_for_physio = results.conditions_for_physio trial_for_physio = results.trial_ids_for_physio success_for_physio = results.success_for_physio lines = [ ','.join([subject, session, str(t), str(gsr), str(cond), str(tid), str(sfp)+ '\n']) for t,gsr,cond,tid,sfp in \ zip(time_scale, raw_gsr, cond_for_physio, trial_for_physio, success_for_physio) ] with open(path,'a') as f : f.writelines(lines)
def save_mean_table() : subjects = [403, 416, 421, 424, 430, 433, 434, 437, 419, 420, 425, 426, 428, 429, 432, 436] path = da.config['PATH']['physio_path'] path = os.path.join(path, 'gsr_results_table.csv') options = { 'do_gsr' : True, 'do_trials' : True, 'only_success' : False, 'silent' : True, } # transform all keys to lower case options = {key.lower():value for key, value in options.items()} # add missing keys to options and transform to namedtuple option_keys =('do_ecg','do_gsr','do_blocks','do_trials','silent') default_opts = dict.fromkeys(option_keys, False) default_opts.update(options) # convert to named tuple for easier access options = namedtuple('Options', default_opts.keys())(**default_opts) # if file already exists cancel everything if os.path.exists(path) : print path, 'file already exists' return # write column names to csv file head = 'subject,session,trial_id,condition,mean_gsr\n' with open(path,'w') as f : f.write(head) # bring subject and session in a form to easy iterate over sessions = [1,2]*len(subjects) subjects = itertools.chain(*zip(subjects,subjects)) for subject, session in zip(subjects, sessions) : print 'Processing subject %s session %s' % (subject, session) subject = str(subject) session = str(session) # try to load data try: physio_data, trials, time_range = da.get_data(subject, session, options.only_success) except da.DataAccessError as e: print('Skip subject %s session %s: %s' % (subject, session, e)) continue time_scale = np.array(physio_data['time']) #print len(time_range), len(time_scale) if len(time_scale) == 0 : raise Exception('not physio data') results = dpp.process_data(physio_data, trials, subject, session, options) condition = trials[2] trial_id = trials[3] trails = trials[:-1] gsr_mean = results.mean_gsr_for_trials trials = zip(*trials) lines = [ ','.join([subject,session,str(tid),cond,str(gsr) + '\n']) for tid,cond,gsr in \ zip(trial_id, condition, gsr_mean) ] with open(path,'a') as f : f.writelines(lines)
def save_raw_table(): #subjects = [403, 416, 421, 424, 430, 433, 434, 437, 419, 420, 425, 426, 428, 429, 432, 436] #subjects = [314, 319,321,323,325,326,327,328,332,333] subjects = [ 312, 314, 315, 317, 320, 322, 329, 330, 332, 403, 416, 419, 420, 421, 424, 425, 426, 428, 430, 432, 433, 436, 437 ] path = da.config['PATH']['physio_path'] version_major = 5 version_minor = 0 path = os.path.join( path, 'gsr_to_gamedata_table_v' + str(version_major) + '.' + str(version_minor) + '.csv') options = { 'do_gsr': True, 'do_trials': True, 'only_success': False, 'silent': True, 'overwrite': True, } # transform all keys to lower case options = {key.lower(): value for key, value in options.items()} # add missing keys to options and transform to namedtuple option_keys = ('do_ecg', 'do_gsr', 'do_blocks', 'do_trials', 'only_success', 'silent', 'overwrite') default_opts = dict.fromkeys(option_keys, False) default_opts.update(options) # convert to named tuple for easier access options = namedtuple('Options', default_opts.keys())(**default_opts) # if file already exists cancel everything if not options.overwrite and os.path.exists(path): print path, 'file already exists' return # write column names to csv file head = 'subject,session,physio_time,raw_gsr,condition,trial_id,success\n' with open(path, 'w') as f: f.write(head) # bring subject and session in a form to easy iterate over sessions = [1, 2] * len(subjects) subjects = itertools.chain(*zip(subjects, subjects)) for subject, session in zip(subjects, sessions): print 'Processing subject %s session %s' % (subject, session) subject = str(subject) session = str(session) # skip try: physio_data, trials, time_range = da.get_data( subject, session, options.only_success, options.silent) except da.DataAccessError as e: print('Skip subject %s session %s: %s' % (subject, session, e)) continue time_scale = np.array(physio_data['time']) #print len(time_range), len(time_scale) if len(time_scale) == 0: raise Exception('not physio data') results = dpp.process_data(physio_data, trials, subject, session, options) raw_gsr = physio_data['gsr'] cond_for_physio = results.conditions_for_physio trial_for_physio = results.trial_ids_for_physio success_for_physio = results.success_for_physio lines = [ ','.join([subject, session, str(t), str(gsr), str(cond), str(tid), str(sfp)+ '\n']) for t,gsr,cond,tid,sfp in \ zip(time_scale, raw_gsr, cond_for_physio, trial_for_physio, success_for_physio) ] with open(path, 'a') as f: f.writelines(lines)
def save_mean_table(): subjects = [ 403, 416, 421, 424, 430, 433, 434, 437, 419, 420, 425, 426, 428, 429, 432, 436 ] path = da.config['PATH']['physio_path'] path = os.path.join(path, 'gsr_results_table.csv') options = { 'do_gsr': True, 'do_trials': True, 'only_success': False, 'silent': True, } # transform all keys to lower case options = {key.lower(): value for key, value in options.items()} # add missing keys to options and transform to namedtuple option_keys = ('do_ecg', 'do_gsr', 'do_blocks', 'do_trials', 'silent') default_opts = dict.fromkeys(option_keys, False) default_opts.update(options) # convert to named tuple for easier access options = namedtuple('Options', default_opts.keys())(**default_opts) # if file already exists cancel everything if os.path.exists(path): print path, 'file already exists' return # write column names to csv file head = 'subject,session,trial_id,condition,mean_gsr\n' with open(path, 'w') as f: f.write(head) # bring subject and session in a form to easy iterate over sessions = [1, 2] * len(subjects) subjects = itertools.chain(*zip(subjects, subjects)) for subject, session in zip(subjects, sessions): print 'Processing subject %s session %s' % (subject, session) subject = str(subject) session = str(session) # try to load data try: physio_data, trials, time_range = da.get_data( subject, session, options.only_success) except da.DataAccessError as e: print('Skip subject %s session %s: %s' % (subject, session, e)) continue time_scale = np.array(physio_data['time']) #print len(time_range), len(time_scale) if len(time_scale) == 0: raise Exception('not physio data') results = dpp.process_data(physio_data, trials, subject, session, options) condition = trials[2] trial_id = trials[3] trails = trials[:-1] gsr_mean = results.mean_gsr_for_trials trials = zip(*trials) lines = [ ','.join([subject,session,str(tid),cond,str(gsr) + '\n']) for tid,cond,gsr in \ zip(trial_id, condition, gsr_mean) ] with open(path, 'a') as f: f.writelines(lines)