def generate_combined_scores(run_mode,
                             algorithms_list=[],
                             evaluated_domain_list=None):
    startTime = datetime.now()
    dicts_list = []
    for alg in algorithms_list:
        dicts_list.append(
            gm.read_object_from_file(
                gm.get_general_file_path(run_mode,
                                         '_'.join([alg, 'a_dict_pickle']),
                                         evaluated_domain_list)))

    combine_types = {'max': 'gm.create_max_dict_from_dicts(dicts_list)',\
                       'avg': 'gm.create_avg_dict_from_dicts(dicts_list)',\
                       'top_3_avg': 'gm.create_avg_dict_from_dicts(dicts_list,n=3)',\
                       'top_2_avg': 'gm.create_avg_dict_from_dicts(dicts_list,n=2)'}
    for k, v in combine_types.items():
        out_file = gm.get_general_file_path(run_mode,
                                            k,
                                            evaluated_domain_list,
                                            dir='outputs')
        comb_score_dict = eval(v)
        u_pct_dict, l_pct_dict = gm.get_percentiles(comb_score_dict)
        gm.write_union_of_dicts_ordered_by_value_to_file(
            comb_score_dict, [u_pct_dict, l_pct_dict], out_file)
    print '\n--- main: combined scores generation and evaluation took: ' + str(
        datetime.now() - startTime)
    sys.stdout.flush()
    return
def compare_scores_histogram(run_mode,
                             algorithms_list=[],
                             evaluated_domain_list=None):
    removed_domains_f = '/home/michal/SALSA_files/tmp/remove_domains_from_results'
    for alg in algorithms_list:

        print '\n--- main: ' + alg
        sys.stdout.flush()
        if 'pagerank' in alg:
            scores_dict = gm.read_object_from_file(
                gm.get_general_file_path(run_mode,
                                         '_'.join([alg, 'a_dict_pickle']),
                                         evaluated_domain_list))
            gm.histogram_of_dict(scores_dict, fn=removed_domains_f, bins=150)
        else:
            a_scores_dict = gm.read_object_from_file(
                gm.get_general_file_path(run_mode,
                                         '_'.join([alg, 'a_dict_pickle']),
                                         evaluated_domain_list))
            print '--- main: authorities'
            sys.stdout.flush()
            gm.histogram_of_dict(a_scores_dict, fn=removed_domains_f, bins=150)
            print '\n--- main: combined'
            sys.stdout.flush()
    scores_dict = combine_scores(algorithms_list)
    gm.histogram_of_dict(scores_dict, fn=removed_domains_f, bins=150)

    return
 def create_combined_scores(self,run_mode,alg_list=[],evaluated_domain_list=None,attr='Lpct'):
     # alg_list = list of algorithms to combine its scores
     # attr = (string) the node attribute which the combined score is based on
     dicts = []
     if attr == 'Lpct':
         ''' MOVED TO BE A MEMBER!!!
         alg_auth_attr = {'salsa':self.n_attr.salsa_auth_l_pct, \
            'hits':self.n_attr.hits_auth_l_pct, \
            'pagerank':self.n_attr.pagerank_l_pct,\
            'inverse_pagerank':self.n_attr.inverse_pagerank_l_pct}
         
         alg_hub_attr = {'salsa':self.n_attr.salsa_hub_l_pct, \
            'hits':self.n_attr.hits_hub_l_pct}'''
             
         for alg in alg_list:
             dicts.append(self.get_nodes_attr_val_dict(self.alg_auth_Lpct[alg]))#attr[alg]))   # create a dict of the domains l_pct and push it to dicts list
         dicts.append(self.get_nodes_attr_val_dict(self.n_attr.risk))   
     
     for comb_type in ['max','avg','top3_avg','top2_avg']:
         out_file = gm.get_general_file_path(run_mode,'_'.join([comb_type,attr]),evaluated_domain_list,dir='outputs')
         gm.create_combined_score(comb_type,dicts,is_last_dict_risk=True,fn=out_file)   
     
     # create a new high level score for hits and salsa (max of auth/hub score):
     tmp_dicts = []
     for k,v in self.alg_hub_Lpct.items():#attr.items():    # for each alg [salsa, hits]
         tmp_dicts.append( dicts.pop( alg_list.index(k) ) )  # add auth scores dict
         tmp_dicts.append( self.get_nodes_attr_val_dict(v) )
         out_file = gm.get_general_file_path(run_mode,'_'.join([k,'max',attr]),evaluated_domain_list,dir='outputs')
         gm.create_combined_score('max',tmp_dicts,is_last_dict_risk=False,fn=out_file)   
         del tmp_dicts[:]
     
     '''combine_types = {'max_Lpct': 'gm.create_max_dict_from_dicts(dicts)',\
                    'avg_Lpct': 'gm.create_avg_dict_from_dicts(dicts)',\
                    'top3_avg_Lpct': 'gm.create_avg_dict_from_dicts(dicts,n=3)',\
                    'top2_avg_Lpct': 'gm.create_avg_dict_from_dicts(dicts,n=2)'}
     
     for k,v in combine_types.items():        
         out_file = gm.get_general_file_path(run_mode,k,evaluated_domain_list,dir='outputs') 
         comb_score_dict = eval(v)
         u_pct_dict, l_pct_dict = gm.get_percentiles(comb_score_dict)
         gm.write_union_of_dicts_ordered_by_value_to_file(comb_score_dict, [u_pct_dict,l_pct_dict,dicts[-1]], out_file)
     
     # create a new high level score for hits and salsa (max of auth/hub score):
     del dicts[:]
     if attr == 'l_pct':
         alg_hub_attr = {'salsa':self.n_attr.salsa_hub_l_pct, \
            'hits':self.n_attr.hits_hub_l_pct}
         for k,v in alg_hub_attr.items():    # for each alg [salsa, hits]
             dicts.append(self.get_nodes_attr_val_dict(v))                   # add hub scores dict
             dicts.append(self.get_nodes_attr_val_dict(alg_auth_attr[k]))    # add auth scores dict
             out_file = gm.get_general_file_path(run_mode,'_'.join([k,'max']),evaluated_domain_list,dir='outputs')
             comb_score_dict = gm.create_max_dict_from_dicts(dicts)
             u_pct_dict, l_pct_dict = gm.get_percentiles(comb_score_dict)
             gm.write_union_of_dicts_ordered_by_value_to_file(comb_score_dict, [u_pct_dict,l_pct_dict,dicts[-1]], out_file)
     '''        
     
     return
def get_output_files(run_mode,fold=None):
    if fold:    f_postfix = ['fold',fold]
    else:       f_postfix = None#[]
    processed_file= gm.get_general_file_path(run_mode, 'input_list',post_list=f_postfix)
    output_users_risk_dict_path = gm.get_general_file_path(run_mode, 'users_risk_dict', post_list=f_postfix)
    output_transitions_dict_path = gm.get_general_file_path(run_mode, 'transitions_dict', post_list=f_postfix)
    output_domain_risk_dict_path = gm.get_general_file_path(run_mode, 'domains_risk_dict', post_list=f_postfix)

    return processed_file, output_users_risk_dict_path, output_transitions_dict_path, output_domain_risk_dict_path
def get_output_files(run_mode,alg,evaluated_domain_list=None):       
    
    if 'pagerank' not in alg:   #hits or salsa
        output_hubs_file = gm.get_general_file_path(run_mode, '_'.join([alg,'hub']), post_list=evaluated_domain_list, dir='outputs')
        output_authorities_file = gm.get_general_file_path(run_mode, '_'.join([alg,'auth']), post_list=evaluated_domain_list, dir='outputs')
    else:   #pagerank or inverse_pagerank
        output_hubs_file = None
        output_authorities_file = gm.get_general_file_path(run_mode, alg, post_list=evaluated_domain_list, dir='outputs')

    return output_hubs_file, output_authorities_file
Exemple #6
0
def main():
    from sklearn import cross_validation
    import stats
    import os
    startTime = datetime.now() 

    domains_risk_dict_f = gm.get_general_file_path(run_mode, 'mal_d/domains_risk', dir='tmp')
    
    # If the domains-label file not exist, run a 'full run' for creating the file (for stratified Kfolds)
    if not os.path.exists(domains_risk_dict_f):
        # run entire flow with empty evaluated domains list- will create the file of labeled domains risk dict (1=mal,0=else)
        run_entire_flow(run_mode,algorithms_list,[],\
                        redirect_ref=redirect_ref,redirect_weight=redirect_weight,\
                        link_ref=link_ref,link_weight=link_weight,\
                        nstart_flag=nstart_flag,wo_users=wo_users)
        #folds_stats_list =  run_entire_flow(run_mode,algorithms_list,[],wo_users=True)
    src_mal_domains = gm.get_general_file_path(run_mode, 'mal_d/src_mal_domains', dir='tmp') 
    mal_list = np.array(gm.read_list_from_file(src_mal_domains))#[line.strip() for line in open(src_mal_domains,'r')])
    '''if run_mode == 'real_run':  '''
    #mal_domains_list = []
    tests_list = []
    if len(mal_list):    #if src_mal_domains file not empty
        #kf = cross_validation.KFold(len(mal_list), n_folds=k_folds, shuffle=True)
        uzip_d_risk = zip(*gm.read_object_from_file(domains_risk_dict_f).items())
        kf = cross_validation.StratifiedKFold(list(uzip_d_risk[1]), n_folds=min(k_folds,sum(uzip_d_risk[1])))
        for train_index, test_index in kf:
            # test_dict is the test fold dict
            test = [np.asarray(uzip_d_risk[0])[test_index],np.asarray(uzip_d_risk[1])[test_index]]
            tests_list.append(test)
            #print test_dict; print 'XXXXX',len(tests_list),'\n',tests_list,'\n\n\n'
            #mal_domains_list.append(list(mal_list[test_index]))
       
    folds_stats_list =  run_entire_flow(run_mode,algorithms_list,tests_list,\
                                        redirect_ref=redirect_ref,redirect_weight=redirect_weight,\
                                        link_ref=link_ref,link_weight=link_weight,\
                                        nstart_flag=nstart_flag,wo_users=wo_users,\
                                        multiproc_flag=multiproc_flag)#,)
    #folds_stats_list =  run_entire_flow(run_mode,algorithms_list,mal_domains_list,redirect_ref=False)#,link_weight=1,wo_users=False)
    #folds_stats_list =  run_entire_flow(run_mode,algorithms_list,tests_list,wo_users=True)
    '''else:
        mal_domains_list = mal_list
        print mal_domains_list
        folds_stats_list =  run_entire_flow_iteration(run_mode,algorithms_list,mal_domains_list,redirect_ref=True,redirect_weight=0.5,link_ref=True,link_weight=0.2)#,wo_users=True)'''
    #run_scores_histogram(run_mode,algorithms_list)
    
    out_fn = gm.get_general_file_path(run_mode, 'eval_union_stats', dir='outputs')
    if len(folds_stats_list):   # if folds_stats_list not empty- means there was K fold cross validation run (not just BL)
        stats.stats_union(folds_stats_list, out_fn, raw_flag=True)
    print 'EVALUATION MAIN: Total time: ',startTime-datetime.now(); sys.stdout.flush()

    return
Exemple #7
0
def get_output_files(run_mode, fold=None):
    if fold: f_postfix = ['fold', fold]
    else: f_postfix = None  #[]
    processed_file = gm.get_general_file_path(run_mode,
                                              'input_list',
                                              post_list=f_postfix)
    output_users_risk_dict_path = gm.get_general_file_path(run_mode,
                                                           'users_risk_dict',
                                                           post_list=f_postfix)
    output_transitions_dict_path = gm.get_general_file_path(
        run_mode, 'transitions_dict', post_list=f_postfix)
    output_domain_risk_dict_path = gm.get_general_file_path(
        run_mode, 'domains_risk_dict', post_list=f_postfix)

    return processed_file, output_users_risk_dict_path, output_transitions_dict_path, output_domain_risk_dict_path
Exemple #8
0
def run_entire_flow_iteration(run_mode,algorithms_list=[],test=[],wo_users=False,link_ref=False,link_weight=0.,redirect_ref=False,redirect_weight=0.,nstart_flag=False,fold=None):
    '''
    Performs the flow  
    Parameters:
    -----------
        run_mode - str (small_test/real_run)
        algorithms_list - list of strs (default-[])
        test - list of numpy arrays [[d1,d2],[0,1]] (default-[])
        wo_users - bool (default-False)
        link_ref - bool (default-False)
        link_weight - float (default-0.)
        redirect_ref - bool (default-False)
        redirect_weight - float (default-0.)
        nstart_flag - bool (default-False)
    Return:
    -------
        fold_stats - stats object
    '''
    if fold:    f_postfix = ['fold',fold]
    else:       f_postfix = []
    outFile = gm.get_general_file_path(run_mode,'stdout',f_postfix,'outputs')
    sys.stdout = open(outFile,'w')
    
    eval_domains = []
    if len(test):
        eval_domains = test[0][np.where(test[1]==1)]
    preproc.main(run_mode, evaluated_domain_list=eval_domains,wo_users=wo_users,link_ref=link_ref,link_weight=link_weight,redirect_ref=redirect_ref,redirect_weight=redirect_weight,fold=fold)
    fold_stats = salsa.main(run_mode, algorithms_list,test=test,fold=fold,nstart_flag=nstart_flag)
    return fold_stats
def generate_combined_scores(run_mode,algorithms_list=[],evaluated_domain_list=None):
    startTime = datetime.now()
    dicts_list = []
    for alg in algorithms_list:
        dicts_list.append(gm.read_object_from_file( gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),evaluated_domain_list) )) 

    combine_types = {'max': 'gm.create_max_dict_from_dicts(dicts_list)',\
                       'avg': 'gm.create_avg_dict_from_dicts(dicts_list)',\
                       'top_3_avg': 'gm.create_avg_dict_from_dicts(dicts_list,n=3)',\
                       'top_2_avg': 'gm.create_avg_dict_from_dicts(dicts_list,n=2)'}
    for k,v in combine_types.items():
        out_file = gm.get_general_file_path(run_mode,k,evaluated_domain_list,dir='outputs') 
        comb_score_dict = eval(v)
        u_pct_dict, l_pct_dict = gm.get_percentiles(comb_score_dict)
        gm.write_union_of_dicts_ordered_by_value_to_file(comb_score_dict, [u_pct_dict,l_pct_dict], out_file)
    print '\n--- main: combined scores generation and evaluation took: ' + str(datetime.now()-startTime); sys.stdout.flush();
    return
def compare_scores_histogram(run_mode,algorithms_list=[],evaluated_domain_list=None):
    removed_domains_f = '/home/michal/SALSA_files/tmp/remove_domains_from_results'
    for alg in algorithms_list:
        
        print '\n--- main: '+alg; sys.stdout.flush()
        if 'pagerank' in alg:
            scores_dict = gm.read_object_from_file( gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),evaluated_domain_list) )
            gm.histogram_of_dict(scores_dict, fn=removed_domains_f,bins=150)
        else:
            a_scores_dict = gm.read_object_from_file( gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),evaluated_domain_list) )
            print '--- main: authorities'; sys.stdout.flush()
            gm.histogram_of_dict(a_scores_dict, fn=removed_domains_f,bins=150)
            print '\n--- main: combined'; sys.stdout.flush()
    scores_dict = combine_scores(algorithms_list)
    gm.histogram_of_dict(scores_dict, fn=removed_domains_f,bins=150)
            
        
    return
def get_output_files(run_mode, alg, evaluated_domain_list=None):

    if 'pagerank' not in alg:  #hits or salsa
        output_hubs_file = gm.get_general_file_path(
            run_mode,
            '_'.join([alg, 'hub']),
            post_list=evaluated_domain_list,
            dir='outputs')
        output_authorities_file = gm.get_general_file_path(
            run_mode,
            '_'.join([alg, 'auth']),
            post_list=evaluated_domain_list,
            dir='outputs')
    else:  #pagerank or inverse_pagerank
        output_hubs_file = None
        output_authorities_file = gm.get_general_file_path(
            run_mode, alg, post_list=evaluated_domain_list, dir='outputs')

    return output_hubs_file, output_authorities_file
def main(run_mode='real_run',
         algorithms_list=[],
         test=[],
         fold=None,
         nstart_flag=False):
    '''
    Performs the models flow 
    * IMPORTANT: algorithms_list- inverse RP changes the graph itself, hence should be last 
    Parameters:
    -----------
        run_mode - str (small_test/real_run) (default-'real_run')
        algorithms_list - list of strs (default-[])
        test - list of numpy arrays [[d1,d2],[0,1]] (default-[])
        fold - str ('1'/'2'/...) (default-None)
        nstart_flag - bool (default-False)
    Return:
    -------
        eval_obj - stats object
    '''
    import numpy as np

    test_mal = []
    if len(test):
        test_mal = test[0][np.where(test[1] == 1)]
    print '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nALGORITHMS MAIN: \n\
    FOLD- '           ,fold,'\nevaluated domains- ',test_mal,'\nalg list- ',algorithms_list,'\nrun mode- ',run_mode,\
    '\nnstart_flag- ',nstart_flag,'\nSTRAT -----> ' ,datetime.now()
    sys.stdout.flush()
    startTime = datetime.now()

    if fold: f_postfix = ['fold', fold]
    else: f_postfix = None

    transitions_dict_path, domain_risk_dict_path = get_input_files(
        run_mode, fold)  #evaluated_domain_list)
    whiteList_path = gm.get_general_file_path(run_mode, 'whiteList')

    G = graph.domains_graph(transitions_dict_path, domain_risk_dict_path,
                            whiteList_path)

    #G.add_nodes_attr(G.n_attr.risk, gm.readDict(domain_risk_dict_path))
    #print '\nbefore preprocessing:'
    print '--- main: num of nodes: ' + str(
        G.G.number_of_nodes()) + ', num of edges: ' + str(
            G.G.number_of_edges())
    sys.stdout.flush()
    tmpTime = datetime.now()
    ''''G.graph_Preprocessing(gm.epsilon)
    print '\nafter graph preprocessing:'; sys.stdout.flush()
    DEBUG.print_num_of_nodes_with_in_deg_0(G.G)
    DEBUG.print_num_of_nodes_with_out_deg_0(G.G)
    print 'num of nodes: ' + str(G.G.number_of_nodes()) + '\nnum of edges: ' + str(G.G.number_of_edges()); sys.stdout.flush()
    '''
    risk_dict = None
    if nstart_flag:
        risk_dict = G.get_nodes_attr_val_dict(G.n_attr.risk)
        if len(test):
            for d in test_mal:
                risk_dict[d] = 0.0
    run = {'salsa':'G.run_salsa(salsa_type=\'salsa_per_class\',nstart_flag=nstart_flag)', \
               'hits':'G.run_hits(hits_type=\'hits\',nstart=risk_dict)', \
               'pagerank':'G.run_pagerank(pagerank_type=\'pagerank\',personalization=risk_dict)',\
               'inverse_pagerank':'G.run_pagerank(pagerank_type=\'pagerank\',personalization=risk_dict,inverse=True)'}

    for alg in algorithms_list:
        h, a = eval(run[alg])
        G.post_filtering_results(alg)
        hubs_file, authorities_file = get_output_files(
            run_mode, alg, f_postfix)  #evaluated_domain_list)
        G.evaluate_algorithem(auth_fn=authorities_file,
                              hub_fn=hubs_file,
                              alg_type=alg)
        '''# write a and h dicts to files using pickle:
        a_fn = gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),f_postfix)#evaluated_domain_list)
        gm.write_object_to_file(a, a_fn)'''
        #G.alg_histogram(alg)
        print '\n--- main: ', alg, ' run + evaluation took: ', str(
            datetime.now() - tmpTime)
        sys.stdout.flush()
        tmpTime = datetime.now()
    '''for n in evaluated_domain_list:
        #out_fn = get_general_file_path(run_mode,file_name='eval_out',evaluated_domain_list=[n],dir='outputs')
        out_fn = gm.get_general_file_path(run_mode,file_name='eval_out_sum',dir='outputs')
        G.write_eval_results_to_csv(evaluated_node=n,fn=out_fn)'''

    #print G.auc_evaluation(algorithms_list, test)#, fn)
    # combined pure risk rank score dict:
    #generate_combined_scores(run_mode,algorithms_list,evaluated_domain_list)
    # combined lower percentage score dict:
    #G.create_combined_scores(run_mode, algorithms_list, evaluated_domain_list)

    # In case of 'full run' we shall create output file of the mal domains and all domains with its rank as 0/1:
    if not fold:
        print '\n--- main: this is a FULL run!'
        out_fn = gm.get_general_file_path(run_mode,
                                          file_name='eval_BL_out',
                                          dir='outputs')
        eval_obj = G.evaluation(algorithms_list, test, out_fn)
        # FOR DEBUG: gm.write_object_to_file(eval_obj, fn='/home/michal/SALSA_files/tmp/s_obj')
        G.export_domains_for_strat_Kfolds('/'.join(
            ['/home/michal/SALSA_files/tmp', run_mode, 'mal_d']))
    else:  # 'fold' run
        print '\n--- main: this is a FOLD run!'
        eval_obj = G.evaluation(algorithms_list, test)
        # export the iteration results to a weka file:
        fn_train = gm.get_general_file_path(run_mode,
                                            file_name='train',
                                            post_list=f_postfix,
                                            dir='outputs',
                                            file_type='.arff')
        fn_test = gm.get_general_file_path(run_mode,
                                           file_name='test',
                                           post_list=f_postfix,
                                           dir='outputs',
                                           file_type='.arff')
        fn_matrix = gm.get_general_file_path(run_mode,
                                             file_name='matrix',
                                             post_list=f_postfix,
                                             dir='outputs',
                                             file_type='.arff')
        G.export_to_weka_file(algorithms_list, test, fn_train, fn_test,
                              fn_matrix)

    G.clear()
    tmpTime = datetime.now(
    )  #clean the graph and all it's attributes for (optional) next run

    print '\n--- main: evaluation took: ', datetime.now() - tmpTime
    sys.stdout.flush()
    print '\nALGORITHMS END.\tTotal run time: ', datetime.now() - startTime
    sys.stdout.flush()
    return eval_obj
def main(run_mode='real_run',algorithms_list=[],test=[],fold=None,nstart_flag=False):                
    '''
    Performs the models flow 
    * IMPORTANT: algorithms_list- inverse RP changes the graph itself, hence should be last 
    Parameters:
    -----------
        run_mode - str (small_test/real_run) (default-'real_run')
        algorithms_list - list of strs (default-[])
        test - list of numpy arrays [[d1,d2],[0,1]] (default-[])
        fold - str ('1'/'2'/...) (default-None)
        nstart_flag - bool (default-False)
    Return:
    -------
        eval_obj - stats object
    '''
    import numpy as np
    
    test_mal = []
    if len(test):
        test_mal = test[0][np.where(test[1]==1)]
    print '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nALGORITHMS MAIN: \n\
    FOLD- ',fold,'\nevaluated domains- ',test_mal,'\nalg list- ',algorithms_list,'\nrun mode- ',run_mode,\
    '\nnstart_flag- ',nstart_flag,'\nSTRAT -----> ' ,datetime.now(); sys.stdout.flush(); startTime = datetime.now()                        
    
    if fold:    f_postfix = ['fold',fold]
    else:       f_postfix = None

    transitions_dict_path, domain_risk_dict_path = get_input_files(run_mode,fold)#evaluated_domain_list)
    whiteList_path = gm.get_general_file_path(run_mode, 'whiteList')

    G = graph.domains_graph(transitions_dict_path,domain_risk_dict_path,whiteList_path)
    
    #G.add_nodes_attr(G.n_attr.risk, gm.readDict(domain_risk_dict_path))
    #print '\nbefore preprocessing:'
    print '--- main: num of nodes: ' + str(G.G.number_of_nodes()) + ', num of edges: ' + str(G.G.number_of_edges()); sys.stdout.flush(); tmpTime=datetime.now()
    
    ''''G.graph_Preprocessing(gm.epsilon)
    print '\nafter graph preprocessing:'; sys.stdout.flush()
    DEBUG.print_num_of_nodes_with_in_deg_0(G.G)
    DEBUG.print_num_of_nodes_with_out_deg_0(G.G)
    print 'num of nodes: ' + str(G.G.number_of_nodes()) + '\nnum of edges: ' + str(G.G.number_of_edges()); sys.stdout.flush()
    '''
    risk_dict = None
    if nstart_flag:
        risk_dict = G.get_nodes_attr_val_dict(G.n_attr.risk)
        if len(test):
            for d in test_mal:
               risk_dict[d] = 0.0 
    run = {'salsa':'G.run_salsa(salsa_type=\'salsa_per_class\',nstart_flag=nstart_flag)', \
               'hits':'G.run_hits(hits_type=\'hits\',nstart=risk_dict)', \
               'pagerank':'G.run_pagerank(pagerank_type=\'pagerank\',personalization=risk_dict)',\
               'inverse_pagerank':'G.run_pagerank(pagerank_type=\'pagerank\',personalization=risk_dict,inverse=True)'}
    
    for alg in algorithms_list:
        h,a = eval(run[alg])
        G.post_filtering_results(alg)
        hubs_file, authorities_file = get_output_files(run_mode,alg,f_postfix)#evaluated_domain_list)
        G.evaluate_algorithem(auth_fn=authorities_file, hub_fn=hubs_file, alg_type=alg)
        
        '''# write a and h dicts to files using pickle:
        a_fn = gm.get_general_file_path(run_mode,'_'.join([alg,'a_dict_pickle']),f_postfix)#evaluated_domain_list)
        gm.write_object_to_file(a, a_fn)'''
        #G.alg_histogram(alg)
        print '\n--- main: ',alg,' run + evaluation took: ',str(datetime.now()-tmpTime); sys.stdout.flush(); tmpTime = datetime.now()
    '''for n in evaluated_domain_list:
        #out_fn = get_general_file_path(run_mode,file_name='eval_out',evaluated_domain_list=[n],dir='outputs')
        out_fn = gm.get_general_file_path(run_mode,file_name='eval_out_sum',dir='outputs')
        G.write_eval_results_to_csv(evaluated_node=n,fn=out_fn)'''
    
    #print G.auc_evaluation(algorithms_list, test)#, fn)   
    # combined pure risk rank score dict:
    #generate_combined_scores(run_mode,algorithms_list,evaluated_domain_list)
    # combined lower percentage score dict:
    #G.create_combined_scores(run_mode, algorithms_list, evaluated_domain_list)   
   
    # In case of 'full run' we shall create output file of the mal domains and all domains with its rank as 0/1:
    if not fold:
        print '\n--- main: this is a FULL run!'
        out_fn = gm.get_general_file_path(run_mode,file_name='eval_BL_out',dir='outputs')
        eval_obj = G.evaluation(algorithms_list, test, out_fn) 
        # FOR DEBUG: gm.write_object_to_file(eval_obj, fn='/home/michal/SALSA_files/tmp/s_obj')
        G.export_domains_for_strat_Kfolds('/'.join(['/home/michal/SALSA_files/tmp',run_mode,'mal_d']))
    else:   # 'fold' run
        print '\n--- main: this is a FOLD run!'
        eval_obj = G.evaluation(algorithms_list, test)
        # export the iteration results to a weka file:
        fn_train = gm.get_general_file_path(run_mode, file_name='train', post_list=f_postfix, dir='outputs',file_type='.arff')
        fn_test = gm.get_general_file_path(run_mode, file_name='test', post_list=f_postfix, dir='outputs',file_type='.arff')
        fn_matrix = gm.get_general_file_path(run_mode, file_name='matrix', post_list=f_postfix, dir='outputs',file_type='.arff')
        G.export_to_weka_file(algorithms_list, test,fn_train,fn_test,fn_matrix)
    
    G.clear(); tmpTime = datetime.now()    #clean the graph and all it's attributes for (optional) next run
    
    
    
    print '\n--- main: evaluation took: ' , datetime.now()-tmpTime; sys.stdout.flush();
    print '\nALGORITHMS END.\tTotal run time: ' , datetime.now()-startTime; sys.stdout.flush()
    return eval_obj