def generate_combined_scores(run_mode, algorithms_list=[], evaluated_domain_list=None): startTime = datetime.now() dicts_list = [] for alg in algorithms_list: dicts_list.append( gm.read_object_from_file( gm.get_general_file_path(run_mode, '_'.join([alg, 'a_dict_pickle']), evaluated_domain_list))) combine_types = {'max': 'gm.create_max_dict_from_dicts(dicts_list)',\ 'avg': 'gm.create_avg_dict_from_dicts(dicts_list)',\ 'top_3_avg': 'gm.create_avg_dict_from_dicts(dicts_list,n=3)',\ 'top_2_avg': 'gm.create_avg_dict_from_dicts(dicts_list,n=2)'} for k, v in combine_types.items(): out_file = gm.get_general_file_path(run_mode, k, evaluated_domain_list, dir='outputs') comb_score_dict = eval(v) u_pct_dict, l_pct_dict = gm.get_percentiles(comb_score_dict) gm.write_union_of_dicts_ordered_by_value_to_file( comb_score_dict, [u_pct_dict, l_pct_dict], out_file) print '\n--- main: combined scores generation and evaluation took: ' + str( datetime.now() - startTime) sys.stdout.flush() return
def compare_scores_histogram(run_mode, algorithms_list=[], evaluated_domain_list=None): removed_domains_f = '/home/michal/SALSA_files/tmp/remove_domains_from_results' for alg in algorithms_list: print '\n--- main: ' + alg sys.stdout.flush() if 'pagerank' in alg: scores_dict = gm.read_object_from_file( gm.get_general_file_path(run_mode, '_'.join([alg, 'a_dict_pickle']), evaluated_domain_list)) gm.histogram_of_dict(scores_dict, fn=removed_domains_f, bins=150) else: a_scores_dict = gm.read_object_from_file( gm.get_general_file_path(run_mode, '_'.join([alg, 'a_dict_pickle']), evaluated_domain_list)) print '--- main: authorities' sys.stdout.flush() gm.histogram_of_dict(a_scores_dict, fn=removed_domains_f, bins=150) print '\n--- main: combined' sys.stdout.flush() scores_dict = combine_scores(algorithms_list) gm.histogram_of_dict(scores_dict, fn=removed_domains_f, bins=150) return
def create_combined_scores(self,run_mode,alg_list=[],evaluated_domain_list=None,attr='Lpct'): # alg_list = list of algorithms to combine its scores # attr = (string) the node attribute which the combined score is based on dicts = [] if attr == 'Lpct': ''' MOVED TO BE A MEMBER!!! alg_auth_attr = {'salsa':self.n_attr.salsa_auth_l_pct, \ 'hits':self.n_attr.hits_auth_l_pct, \ 'pagerank':self.n_attr.pagerank_l_pct,\ 'inverse_pagerank':self.n_attr.inverse_pagerank_l_pct} alg_hub_attr = {'salsa':self.n_attr.salsa_hub_l_pct, \ 'hits':self.n_attr.hits_hub_l_pct}''' for alg in alg_list: dicts.append(self.get_nodes_attr_val_dict(self.alg_auth_Lpct[alg]))#attr[alg])) # create a dict of the domains l_pct and push it to dicts list dicts.append(self.get_nodes_attr_val_dict(self.n_attr.risk)) for comb_type in ['max','avg','top3_avg','top2_avg']: out_file = gm.get_general_file_path(run_mode,'_'.join([comb_type,attr]),evaluated_domain_list,dir='outputs') gm.create_combined_score(comb_type,dicts,is_last_dict_risk=True,fn=out_file) # create a new high level score for hits and salsa (max of auth/hub score): tmp_dicts = [] for k,v in self.alg_hub_Lpct.items():#attr.items(): # for each alg [salsa, hits] tmp_dicts.append( dicts.pop( alg_list.index(k) ) ) # add auth scores dict tmp_dicts.append( self.get_nodes_attr_val_dict(v) ) out_file = gm.get_general_file_path(run_mode,'_'.join([k,'max',attr]),evaluated_domain_list,dir='outputs') gm.create_combined_score('max',tmp_dicts,is_last_dict_risk=False,fn=out_file) del tmp_dicts[:] '''combine_types = {'max_Lpct': 'gm.create_max_dict_from_dicts(dicts)',\ 'avg_Lpct': 'gm.create_avg_dict_from_dicts(dicts)',\ 'top3_avg_Lpct': 'gm.create_avg_dict_from_dicts(dicts,n=3)',\ 'top2_avg_Lpct': 'gm.create_avg_dict_from_dicts(dicts,n=2)'} for k,v in combine_types.items(): out_file = gm.get_general_file_path(run_mode,k,evaluated_domain_list,dir='outputs') comb_score_dict = eval(v) u_pct_dict, l_pct_dict = gm.get_percentiles(comb_score_dict) gm.write_union_of_dicts_ordered_by_value_to_file(comb_score_dict, [u_pct_dict,l_pct_dict,dicts[-1]], out_file) # create a new high level score for hits and salsa (max of auth/hub score): del dicts[:] if attr == 'l_pct': alg_hub_attr = {'salsa':self.n_attr.salsa_hub_l_pct, \ 'hits':self.n_attr.hits_hub_l_pct} for k,v in alg_hub_attr.items(): # for each alg [salsa, hits] dicts.append(self.get_nodes_attr_val_dict(v)) # add hub scores dict dicts.append(self.get_nodes_attr_val_dict(alg_auth_attr[k])) # add auth scores dict out_file = gm.get_general_file_path(run_mode,'_'.join([k,'max']),evaluated_domain_list,dir='outputs') comb_score_dict = gm.create_max_dict_from_dicts(dicts) u_pct_dict, l_pct_dict = gm.get_percentiles(comb_score_dict) gm.write_union_of_dicts_ordered_by_value_to_file(comb_score_dict, [u_pct_dict,l_pct_dict,dicts[-1]], out_file) ''' return
def get_output_files(run_mode,fold=None): if fold: f_postfix = ['fold',fold] else: f_postfix = None#[] processed_file= gm.get_general_file_path(run_mode, 'input_list',post_list=f_postfix) output_users_risk_dict_path = gm.get_general_file_path(run_mode, 'users_risk_dict', post_list=f_postfix) output_transitions_dict_path = gm.get_general_file_path(run_mode, 'transitions_dict', post_list=f_postfix) output_domain_risk_dict_path = gm.get_general_file_path(run_mode, 'domains_risk_dict', post_list=f_postfix) return processed_file, output_users_risk_dict_path, output_transitions_dict_path, output_domain_risk_dict_path
def get_output_files(run_mode,alg,evaluated_domain_list=None): if 'pagerank' not in alg: #hits or salsa output_hubs_file = gm.get_general_file_path(run_mode, '_'.join([alg,'hub']), post_list=evaluated_domain_list, dir='outputs') output_authorities_file = gm.get_general_file_path(run_mode, '_'.join([alg,'auth']), post_list=evaluated_domain_list, dir='outputs') else: #pagerank or inverse_pagerank output_hubs_file = None output_authorities_file = gm.get_general_file_path(run_mode, alg, post_list=evaluated_domain_list, dir='outputs') return output_hubs_file, output_authorities_file
def main(): from sklearn import cross_validation import stats import os startTime = datetime.now() domains_risk_dict_f = gm.get_general_file_path(run_mode, 'mal_d/domains_risk', dir='tmp') # If the domains-label file not exist, run a 'full run' for creating the file (for stratified Kfolds) if not os.path.exists(domains_risk_dict_f): # run entire flow with empty evaluated domains list- will create the file of labeled domains risk dict (1=mal,0=else) run_entire_flow(run_mode,algorithms_list,[],\ redirect_ref=redirect_ref,redirect_weight=redirect_weight,\ link_ref=link_ref,link_weight=link_weight,\ nstart_flag=nstart_flag,wo_users=wo_users) #folds_stats_list = run_entire_flow(run_mode,algorithms_list,[],wo_users=True) src_mal_domains = gm.get_general_file_path(run_mode, 'mal_d/src_mal_domains', dir='tmp') mal_list = np.array(gm.read_list_from_file(src_mal_domains))#[line.strip() for line in open(src_mal_domains,'r')]) '''if run_mode == 'real_run': ''' #mal_domains_list = [] tests_list = [] if len(mal_list): #if src_mal_domains file not empty #kf = cross_validation.KFold(len(mal_list), n_folds=k_folds, shuffle=True) uzip_d_risk = zip(*gm.read_object_from_file(domains_risk_dict_f).items()) kf = cross_validation.StratifiedKFold(list(uzip_d_risk[1]), n_folds=min(k_folds,sum(uzip_d_risk[1]))) for train_index, test_index in kf: # test_dict is the test fold dict test = [np.asarray(uzip_d_risk[0])[test_index],np.asarray(uzip_d_risk[1])[test_index]] tests_list.append(test) #print test_dict; print 'XXXXX',len(tests_list),'\n',tests_list,'\n\n\n' #mal_domains_list.append(list(mal_list[test_index])) folds_stats_list = run_entire_flow(run_mode,algorithms_list,tests_list,\ redirect_ref=redirect_ref,redirect_weight=redirect_weight,\ link_ref=link_ref,link_weight=link_weight,\ nstart_flag=nstart_flag,wo_users=wo_users,\ multiproc_flag=multiproc_flag)#,) #folds_stats_list = run_entire_flow(run_mode,algorithms_list,mal_domains_list,redirect_ref=False)#,link_weight=1,wo_users=False) #folds_stats_list = run_entire_flow(run_mode,algorithms_list,tests_list,wo_users=True) '''else: mal_domains_list = mal_list print mal_domains_list folds_stats_list = run_entire_flow_iteration(run_mode,algorithms_list,mal_domains_list,redirect_ref=True,redirect_weight=0.5,link_ref=True,link_weight=0.2)#,wo_users=True)''' #run_scores_histogram(run_mode,algorithms_list) out_fn = gm.get_general_file_path(run_mode, 'eval_union_stats', dir='outputs') if len(folds_stats_list): # if folds_stats_list not empty- means there was K fold cross validation run (not just BL) stats.stats_union(folds_stats_list, out_fn, raw_flag=True) print 'EVALUATION MAIN: Total time: ',startTime-datetime.now(); sys.stdout.flush() return
def get_output_files(run_mode, fold=None): if fold: f_postfix = ['fold', fold] else: f_postfix = None #[] processed_file = gm.get_general_file_path(run_mode, 'input_list', post_list=f_postfix) output_users_risk_dict_path = gm.get_general_file_path(run_mode, 'users_risk_dict', post_list=f_postfix) output_transitions_dict_path = gm.get_general_file_path( run_mode, 'transitions_dict', post_list=f_postfix) output_domain_risk_dict_path = gm.get_general_file_path( run_mode, 'domains_risk_dict', post_list=f_postfix) return processed_file, output_users_risk_dict_path, output_transitions_dict_path, output_domain_risk_dict_path
def run_entire_flow_iteration(run_mode,algorithms_list=[],test=[],wo_users=False,link_ref=False,link_weight=0.,redirect_ref=False,redirect_weight=0.,nstart_flag=False,fold=None): ''' Performs the flow Parameters: ----------- run_mode - str (small_test/real_run) algorithms_list - list of strs (default-[]) test - list of numpy arrays [[d1,d2],[0,1]] (default-[]) wo_users - bool (default-False) link_ref - bool (default-False) link_weight - float (default-0.) redirect_ref - bool (default-False) redirect_weight - float (default-0.) nstart_flag - bool (default-False) Return: ------- fold_stats - stats object ''' if fold: f_postfix = ['fold',fold] else: f_postfix = [] outFile = gm.get_general_file_path(run_mode,'stdout',f_postfix,'outputs') sys.stdout = open(outFile,'w') eval_domains = [] if len(test): eval_domains = test[0][np.where(test[1]==1)] preproc.main(run_mode, evaluated_domain_list=eval_domains,wo_users=wo_users,link_ref=link_ref,link_weight=link_weight,redirect_ref=redirect_ref,redirect_weight=redirect_weight,fold=fold) fold_stats = salsa.main(run_mode, algorithms_list,test=test,fold=fold,nstart_flag=nstart_flag) return fold_stats
def generate_combined_scores(run_mode,algorithms_list=[],evaluated_domain_list=None): startTime = datetime.now() dicts_list = [] for alg in algorithms_list: dicts_list.append(gm.read_object_from_file( gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),evaluated_domain_list) )) combine_types = {'max': 'gm.create_max_dict_from_dicts(dicts_list)',\ 'avg': 'gm.create_avg_dict_from_dicts(dicts_list)',\ 'top_3_avg': 'gm.create_avg_dict_from_dicts(dicts_list,n=3)',\ 'top_2_avg': 'gm.create_avg_dict_from_dicts(dicts_list,n=2)'} for k,v in combine_types.items(): out_file = gm.get_general_file_path(run_mode,k,evaluated_domain_list,dir='outputs') comb_score_dict = eval(v) u_pct_dict, l_pct_dict = gm.get_percentiles(comb_score_dict) gm.write_union_of_dicts_ordered_by_value_to_file(comb_score_dict, [u_pct_dict,l_pct_dict], out_file) print '\n--- main: combined scores generation and evaluation took: ' + str(datetime.now()-startTime); sys.stdout.flush(); return
def compare_scores_histogram(run_mode,algorithms_list=[],evaluated_domain_list=None): removed_domains_f = '/home/michal/SALSA_files/tmp/remove_domains_from_results' for alg in algorithms_list: print '\n--- main: '+alg; sys.stdout.flush() if 'pagerank' in alg: scores_dict = gm.read_object_from_file( gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),evaluated_domain_list) ) gm.histogram_of_dict(scores_dict, fn=removed_domains_f,bins=150) else: a_scores_dict = gm.read_object_from_file( gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),evaluated_domain_list) ) print '--- main: authorities'; sys.stdout.flush() gm.histogram_of_dict(a_scores_dict, fn=removed_domains_f,bins=150) print '\n--- main: combined'; sys.stdout.flush() scores_dict = combine_scores(algorithms_list) gm.histogram_of_dict(scores_dict, fn=removed_domains_f,bins=150) return
def get_output_files(run_mode, alg, evaluated_domain_list=None): if 'pagerank' not in alg: #hits or salsa output_hubs_file = gm.get_general_file_path( run_mode, '_'.join([alg, 'hub']), post_list=evaluated_domain_list, dir='outputs') output_authorities_file = gm.get_general_file_path( run_mode, '_'.join([alg, 'auth']), post_list=evaluated_domain_list, dir='outputs') else: #pagerank or inverse_pagerank output_hubs_file = None output_authorities_file = gm.get_general_file_path( run_mode, alg, post_list=evaluated_domain_list, dir='outputs') return output_hubs_file, output_authorities_file
def main(run_mode='real_run', algorithms_list=[], test=[], fold=None, nstart_flag=False): ''' Performs the models flow * IMPORTANT: algorithms_list- inverse RP changes the graph itself, hence should be last Parameters: ----------- run_mode - str (small_test/real_run) (default-'real_run') algorithms_list - list of strs (default-[]) test - list of numpy arrays [[d1,d2],[0,1]] (default-[]) fold - str ('1'/'2'/...) (default-None) nstart_flag - bool (default-False) Return: ------- eval_obj - stats object ''' import numpy as np test_mal = [] if len(test): test_mal = test[0][np.where(test[1] == 1)] print '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nALGORITHMS MAIN: \n\ FOLD- ' ,fold,'\nevaluated domains- ',test_mal,'\nalg list- ',algorithms_list,'\nrun mode- ',run_mode,\ '\nnstart_flag- ',nstart_flag,'\nSTRAT -----> ' ,datetime.now() sys.stdout.flush() startTime = datetime.now() if fold: f_postfix = ['fold', fold] else: f_postfix = None transitions_dict_path, domain_risk_dict_path = get_input_files( run_mode, fold) #evaluated_domain_list) whiteList_path = gm.get_general_file_path(run_mode, 'whiteList') G = graph.domains_graph(transitions_dict_path, domain_risk_dict_path, whiteList_path) #G.add_nodes_attr(G.n_attr.risk, gm.readDict(domain_risk_dict_path)) #print '\nbefore preprocessing:' print '--- main: num of nodes: ' + str( G.G.number_of_nodes()) + ', num of edges: ' + str( G.G.number_of_edges()) sys.stdout.flush() tmpTime = datetime.now() ''''G.graph_Preprocessing(gm.epsilon) print '\nafter graph preprocessing:'; sys.stdout.flush() DEBUG.print_num_of_nodes_with_in_deg_0(G.G) DEBUG.print_num_of_nodes_with_out_deg_0(G.G) print 'num of nodes: ' + str(G.G.number_of_nodes()) + '\nnum of edges: ' + str(G.G.number_of_edges()); sys.stdout.flush() ''' risk_dict = None if nstart_flag: risk_dict = G.get_nodes_attr_val_dict(G.n_attr.risk) if len(test): for d in test_mal: risk_dict[d] = 0.0 run = {'salsa':'G.run_salsa(salsa_type=\'salsa_per_class\',nstart_flag=nstart_flag)', \ 'hits':'G.run_hits(hits_type=\'hits\',nstart=risk_dict)', \ 'pagerank':'G.run_pagerank(pagerank_type=\'pagerank\',personalization=risk_dict)',\ 'inverse_pagerank':'G.run_pagerank(pagerank_type=\'pagerank\',personalization=risk_dict,inverse=True)'} for alg in algorithms_list: h, a = eval(run[alg]) G.post_filtering_results(alg) hubs_file, authorities_file = get_output_files( run_mode, alg, f_postfix) #evaluated_domain_list) G.evaluate_algorithem(auth_fn=authorities_file, hub_fn=hubs_file, alg_type=alg) '''# write a and h dicts to files using pickle: a_fn = gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),f_postfix)#evaluated_domain_list) gm.write_object_to_file(a, a_fn)''' #G.alg_histogram(alg) print '\n--- main: ', alg, ' run + evaluation took: ', str( datetime.now() - tmpTime) sys.stdout.flush() tmpTime = datetime.now() '''for n in evaluated_domain_list: #out_fn = get_general_file_path(run_mode,file_name='eval_out',evaluated_domain_list=[n],dir='outputs') out_fn = gm.get_general_file_path(run_mode,file_name='eval_out_sum',dir='outputs') G.write_eval_results_to_csv(evaluated_node=n,fn=out_fn)''' #print G.auc_evaluation(algorithms_list, test)#, fn) # combined pure risk rank score dict: #generate_combined_scores(run_mode,algorithms_list,evaluated_domain_list) # combined lower percentage score dict: #G.create_combined_scores(run_mode, algorithms_list, evaluated_domain_list) # In case of 'full run' we shall create output file of the mal domains and all domains with its rank as 0/1: if not fold: print '\n--- main: this is a FULL run!' out_fn = gm.get_general_file_path(run_mode, file_name='eval_BL_out', dir='outputs') eval_obj = G.evaluation(algorithms_list, test, out_fn) # FOR DEBUG: gm.write_object_to_file(eval_obj, fn='/home/michal/SALSA_files/tmp/s_obj') G.export_domains_for_strat_Kfolds('/'.join( ['/home/michal/SALSA_files/tmp', run_mode, 'mal_d'])) else: # 'fold' run print '\n--- main: this is a FOLD run!' eval_obj = G.evaluation(algorithms_list, test) # export the iteration results to a weka file: fn_train = gm.get_general_file_path(run_mode, file_name='train', post_list=f_postfix, dir='outputs', file_type='.arff') fn_test = gm.get_general_file_path(run_mode, file_name='test', post_list=f_postfix, dir='outputs', file_type='.arff') fn_matrix = gm.get_general_file_path(run_mode, file_name='matrix', post_list=f_postfix, dir='outputs', file_type='.arff') G.export_to_weka_file(algorithms_list, test, fn_train, fn_test, fn_matrix) G.clear() tmpTime = datetime.now( ) #clean the graph and all it's attributes for (optional) next run print '\n--- main: evaluation took: ', datetime.now() - tmpTime sys.stdout.flush() print '\nALGORITHMS END.\tTotal run time: ', datetime.now() - startTime sys.stdout.flush() return eval_obj
def main(run_mode='real_run',algorithms_list=[],test=[],fold=None,nstart_flag=False): ''' Performs the models flow * IMPORTANT: algorithms_list- inverse RP changes the graph itself, hence should be last Parameters: ----------- run_mode - str (small_test/real_run) (default-'real_run') algorithms_list - list of strs (default-[]) test - list of numpy arrays [[d1,d2],[0,1]] (default-[]) fold - str ('1'/'2'/...) (default-None) nstart_flag - bool (default-False) Return: ------- eval_obj - stats object ''' import numpy as np test_mal = [] if len(test): test_mal = test[0][np.where(test[1]==1)] print '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nALGORITHMS MAIN: \n\ FOLD- ',fold,'\nevaluated domains- ',test_mal,'\nalg list- ',algorithms_list,'\nrun mode- ',run_mode,\ '\nnstart_flag- ',nstart_flag,'\nSTRAT -----> ' ,datetime.now(); sys.stdout.flush(); startTime = datetime.now() if fold: f_postfix = ['fold',fold] else: f_postfix = None transitions_dict_path, domain_risk_dict_path = get_input_files(run_mode,fold)#evaluated_domain_list) whiteList_path = gm.get_general_file_path(run_mode, 'whiteList') G = graph.domains_graph(transitions_dict_path,domain_risk_dict_path,whiteList_path) #G.add_nodes_attr(G.n_attr.risk, gm.readDict(domain_risk_dict_path)) #print '\nbefore preprocessing:' print '--- main: num of nodes: ' + str(G.G.number_of_nodes()) + ', num of edges: ' + str(G.G.number_of_edges()); sys.stdout.flush(); tmpTime=datetime.now() ''''G.graph_Preprocessing(gm.epsilon) print '\nafter graph preprocessing:'; sys.stdout.flush() DEBUG.print_num_of_nodes_with_in_deg_0(G.G) DEBUG.print_num_of_nodes_with_out_deg_0(G.G) print 'num of nodes: ' + str(G.G.number_of_nodes()) + '\nnum of edges: ' + str(G.G.number_of_edges()); sys.stdout.flush() ''' risk_dict = None if nstart_flag: risk_dict = G.get_nodes_attr_val_dict(G.n_attr.risk) if len(test): for d in test_mal: risk_dict[d] = 0.0 run = {'salsa':'G.run_salsa(salsa_type=\'salsa_per_class\',nstart_flag=nstart_flag)', \ 'hits':'G.run_hits(hits_type=\'hits\',nstart=risk_dict)', \ 'pagerank':'G.run_pagerank(pagerank_type=\'pagerank\',personalization=risk_dict)',\ 'inverse_pagerank':'G.run_pagerank(pagerank_type=\'pagerank\',personalization=risk_dict,inverse=True)'} for alg in algorithms_list: h,a = eval(run[alg]) G.post_filtering_results(alg) hubs_file, authorities_file = get_output_files(run_mode,alg,f_postfix)#evaluated_domain_list) G.evaluate_algorithem(auth_fn=authorities_file, hub_fn=hubs_file, alg_type=alg) '''# write a and h dicts to files using pickle: a_fn = gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),f_postfix)#evaluated_domain_list) gm.write_object_to_file(a, a_fn)''' #G.alg_histogram(alg) print '\n--- main: ',alg,' run + evaluation took: ',str(datetime.now()-tmpTime); sys.stdout.flush(); tmpTime = datetime.now() '''for n in evaluated_domain_list: #out_fn = get_general_file_path(run_mode,file_name='eval_out',evaluated_domain_list=[n],dir='outputs') out_fn = gm.get_general_file_path(run_mode,file_name='eval_out_sum',dir='outputs') G.write_eval_results_to_csv(evaluated_node=n,fn=out_fn)''' #print G.auc_evaluation(algorithms_list, test)#, fn) # combined pure risk rank score dict: #generate_combined_scores(run_mode,algorithms_list,evaluated_domain_list) # combined lower percentage score dict: #G.create_combined_scores(run_mode, algorithms_list, evaluated_domain_list) # In case of 'full run' we shall create output file of the mal domains and all domains with its rank as 0/1: if not fold: print '\n--- main: this is a FULL run!' out_fn = gm.get_general_file_path(run_mode,file_name='eval_BL_out',dir='outputs') eval_obj = G.evaluation(algorithms_list, test, out_fn) # FOR DEBUG: gm.write_object_to_file(eval_obj, fn='/home/michal/SALSA_files/tmp/s_obj') G.export_domains_for_strat_Kfolds('/'.join(['/home/michal/SALSA_files/tmp',run_mode,'mal_d'])) else: # 'fold' run print '\n--- main: this is a FOLD run!' eval_obj = G.evaluation(algorithms_list, test) # export the iteration results to a weka file: fn_train = gm.get_general_file_path(run_mode, file_name='train', post_list=f_postfix, dir='outputs',file_type='.arff') fn_test = gm.get_general_file_path(run_mode, file_name='test', post_list=f_postfix, dir='outputs',file_type='.arff') fn_matrix = gm.get_general_file_path(run_mode, file_name='matrix', post_list=f_postfix, dir='outputs',file_type='.arff') G.export_to_weka_file(algorithms_list, test,fn_train,fn_test,fn_matrix) G.clear(); tmpTime = datetime.now() #clean the graph and all it's attributes for (optional) next run print '\n--- main: evaluation took: ' , datetime.now()-tmpTime; sys.stdout.flush(); print '\nALGORITHMS END.\tTotal run time: ' , datetime.now()-startTime; sys.stdout.flush() return eval_obj