Beispiel #1
0
def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas = 150, title_infix='', metrics=None, intermediates=False, n_jobs=-1):
#generate one or more replicas and compare them to the original graph
    if seed==None:
        seed = npr.randint(1E6)
    print('rand seed: %d'%seed)
    npr.seed(seed)
    random.seed(seed)

    if generator_func==None:
        generator_func=algorithms.generate_graph

    if G==None:
        G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist')

    if metrics == None:
        metrics = graphutils.default_metrics[:]
    metrics = [m for m in metrics if m['optional'] < 2]
    if 'metric_runningtime_bound' in params:
        mrtb = params['metric_runningtime_bound']
        metrics = [m for m in metrics if m['runningtime'] <= mrtb]
    metrics = [m for m in metrics if m['name'] not in ['avg flow closeness']] #broken in NX 1.6
    metrics.reverse()

    if params == None:
        params  = {'verbose':False,  'node_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 
                'edge_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate':[0], 'locality_bias_correction':0., 'enforce_connected':True, 'accept_chance_edges':1.0,
                'retain_intermediates':intermediates}
    if intermediates:
        params['retain_intermediates'] = True
    print('Params:')
    print(params)
    print('Metrics:')
    print([metric['name'] for metric in metrics])

    replicas         = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs)
    jaccard_edges    = evaluate_similarity(base_graphs=G, graphs=replicas, n_jobs=n_jobs)  #this is actually a mean
    vals_of_all      = evaluate_metrics(graphs=[G]+replicas, metrics=metrics, n_jobs=n_jobs)
    vals_of_graph    = [metric_data[0]  for metric_data in vals_of_all]
    vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all]
    replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', ''))
    #pylab.show()
    data = {'metrics':[met['name'] for met in metrics], 'name':getattr(G, 'name', ''), 'params':params, 'num_replicas':num_replicas, 'figpath':figpath}
    data[0] = replica_statistics
    data[0].update({'vals_of_replicas':vals_of_replicas, 'val_of_models':vals_of_graph, 'avg_jaccard_edges':jaccard_edges})

    if intermediates:
        current_replicas = replicas
        for level in range(1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))):
            print('LEVEL: %d'%level)
            coarse_models   = [r.coarser_graph.model_graph  for r in current_replicas]
            coarse_replicas = [r.coarser_graph              for r in current_replicas]
            vals_of_models   = evaluate_metrics(graphs=coarse_models,   metrics=metrics, n_jobs=n_jobs)
            vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs)
            jaccard_edges    = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs)

            replica_statistics, dummy \
                 = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, 
                                  metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges)
            current_replicas = coarse_replicas
            data[level] = replica_statistics
            data[level].update({'vals_of_replicas':vals_of_replicas, 'vals_of_models':vals_of_models, 'avg_jaccard_edges':jaccard_edges})
    graphutils.safe_pickle(path=figpath+'.pkl', data=data)
    save_param_set(params, seed, figpath)
    save_stats_csv(path=figpath+'.csv', seed=seed, data=data)

    return data
        if verbose:
            print('Writing graph image: %s ..' % image_path)
        sys.stdout.flush()
        retCode = os.system(visualizer_cmdl)

        if verbose:
            print(visualizer_cmdl)
        if os.name == 'nt':
            pdf_cmld = 'start %s' % image_path
            if verbose:
                print(pdf_cmld)
            os.system(pdf_cmld)
        elif os.name == 'posix':
            #aside: file --mime-type -b my.pdf
            open_in_unix(image_path, verbose=verbose, ext='pdf')
    elif write_graph and visualizer != None:
        if verbose:
            print('Running visualizer: ' + str(visualizer))
        sys.stdout.flush()
        visualizer_cmdl = visualizer + ' ' + output_path
        retCode = os.system(visualizer_cmdl)

        if verbose:
            print(visualizer_cmdl)

    graphutils.safe_pickle(path=output_path + '.pkl',
                           data=new_G,
                           params=params)
    if verbose:
        print('Replica is referenced by variable: new_G')
def replica_vs_original(seed=None,
                        figpath=None,
                        generator_func=None,
                        G=None,
                        params=None,
                        num_replicas=150,
                        title_infix='',
                        metrics=None,
                        intermediates=False,
                        n_jobs=-1,
                        store_replicas=False):
    """generate one or more replicas and compare them to the original graph"""
    if seed == None:
        seed = npr.randint(1E6)
    print('rand seed: %d' % seed)
    npr.seed(seed)
    random.seed(seed)

    if generator_func == None:
        generator_func = algorithms.generate_graph

    if G == None:
        G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist')

    if metrics == None:
        metrics = graphutils.default_metrics[:]
    metrics = [m for m in metrics if m['optional'] < 2]
    if 'metric_runningtime_bound' in params:
        mrtb = params['metric_runningtime_bound']
        metrics = [m for m in metrics if m['runningtime'] <= mrtb]
    metrics = [m for m in metrics
               if m['name'] not in ['avg flow closeness']]  #broken in NX 1.6
    metrics.reverse()

    if params == None:
        params = {
            'verbose': False,
            'node_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01],
            'edge_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01],
            'node_growth_rate': [0],
            'locality_bias_correction': 0.,
            'enforce_connected': True,
            'accept_chance_edges': 1.0,
            'retain_intermediates': intermediates
        }
    if intermediates:
        params['retain_intermediates'] = True
    print('Params:')
    print(params)
    print('Metrics:')
    print([metric['name'] for metric in metrics])

    #if generator_func == algorithms.generate_graph:
    #replicas         = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs)
    #else:
    #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params,
    #   title_infix=title_infix, n_jobs=n_jobs)
    replicas = read_all_files()
    jaccard_edges = evaluate_similarity(
        base_graphs=G, graphs=replicas,
        n_jobs=n_jobs)  #this is actually a mean
    vals_of_all = evaluate_metrics(graphs=[G] + replicas,
                                   metrics=metrics,
                                   n_jobs=n_jobs)
    vals_of_graph = [metric_data[0] for metric_data in vals_of_all]
    vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all]
    replica_statistics, figpath = plot_deviation(vals_of_replicas,
                                                 vals_of_graph, metrics,
                                                 figpath, jaccard_edges,
                                                 title_infix, seed,
                                                 getattr(G, 'name', ''))
    #pylab.show()
    data = {
        'metrics': [met['name'] for met in metrics],
        'name': getattr(G, 'name', ''),
        'params': params,
        'num_replicas': num_replicas,
        'figpath': figpath
    }
    data[0] = replica_statistics
    data[0].update({
        'vals_of_replicas': vals_of_replicas,
        'val_of_models': vals_of_graph,
        'avg_jaccard_edges': jaccard_edges
    })
    out_dir = "/home/varsha/Documents/final_results/Krongen/Boeing_normalized" + timeNow(
    )
    myfile = open(out_dir, 'w')
    i = 0
    for repl in vals_of_replicas:
        for elem in repl:
            nor_value = elem
            if (vals_of_graph[i] != 0):
                nor_value = float(elem) / vals_of_graph[i]
            myfile.write(str(nor_value))
            myfile.write('\t')
        i += 1
        myfile.write('\n')
    myfile.close()

    if intermediates:
        current_replicas = replicas
        for level in range(
                1,
                max(len(params.get('node_edit_rate', [])),
                    len(params.get('edge_edit_rate', [])),
                    len(params.get('node_growth_rate', [])),
                    len(params.get('edge_growth_rate', [])))):
            print('LEVEL: %d' % level)
            coarse_models = [
                r.coarser_graph.model_graph for r in current_replicas
            ]
            coarse_replicas = [r.coarser_graph for r in current_replicas]
            vals_of_models = evaluate_metrics(graphs=coarse_models,
                                              metrics=metrics,
                                              n_jobs=n_jobs)
            vals_of_replicas = evaluate_metrics(graphs=coarse_replicas,
                                                metrics=metrics,
                                                n_jobs=n_jobs)
            jaccard_edges = evaluate_similarity(base_graphs=coarse_models,
                                                graphs=coarse_replicas,
                                                n_jobs=n_jobs)

            replica_statistics, dummy \
                 = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models,
                                  metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges)
            current_replicas = coarse_replicas
            data[level] = replica_statistics
            data[level].update({
                'vals_of_replicas': vals_of_replicas,
                'vals_of_models': vals_of_models,
                'avg_jaccard_edges': jaccard_edges
            })
    graphutils.safe_pickle(path=figpath + '.pkl', data=data)
    save_param_set(params, seed, figpath)
    save_stats_csv(path=figpath + '.csv', seed=seed, data=data)

    # optionally store replica graphs in files
    if store_replicas:
        out_dir = "output/replicas_{0}_{1}".format(getattr(
            G, "name", ""), timeNow())  # FIXME: add graph name
        os.mkdir(out_dir)
        for (G, replica_no) in zip(replicas, range(len(replicas))):
            graphutils.write_graph(G,
                                   path="{0}/{1}.gml".format(
                                       out_dir, replica_no))

    return data
Beispiel #4
0
        visualizer_cmdl = sfdp_default_cmd + " -Tpdf %s > %s 2> %s " % (output_path, image_path, stderr_path)
        if verbose:
            print "Writing graph image: %s .." % image_path
        sys.stdout.flush()
        retCode = os.system(visualizer_cmdl)

        if verbose:
            print visualizer_cmdl
        if os.name == "nt":
            pdf_cmld = "start %s" % image_path
            if verbose:
                print pdf_cmld
            os.system(pdf_cmld)
        elif os.name == "posix":
            # aside: file --mime-type -b my.pdf
            open_in_unix(image_path, verbose=verbose, ext="pdf")
    elif write_graph and visualizer != None:
        if verbose:
            print "Running visualizer: " + str(visualizer)
        sys.stdout.flush()
        visualizer_cmdl = visualizer + " " + output_path
        retCode = os.system(visualizer_cmdl)

        if verbose:
            print visualizer_cmdl

    graphutils.safe_pickle(path=output_path + ".pkl", data=new_G, params=params)
    if verbose:
        print "Replica is referenced by variable: new_G"
Beispiel #5
0
        current_replicas = replicas
        for level in xrange(1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))):
            print 'LEVEL: %d'%level
            coarse_models   = [r.coarser_graph.model_graph  for r in current_replicas]
            coarse_replicas = [r.coarser_graph              for r in current_replicas]
            vals_of_models   = evaluate_metrics(graphs=coarse_models,   metrics=metrics, n_jobs=n_jobs)
            vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs)
            jaccard_edges    = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs)

            replica_statistics, dummy \
                 = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, 
                                  metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges)
            current_replicas = coarse_replicas
            data[level] = replica_statistics
            data[level].update({'vals_of_replicas':vals_of_replicas, 'vals_of_models':vals_of_models, 'avg_jaccard_edges':jaccard_edges})
    graphutils.safe_pickle(path=figpath+'.pkl', data=data)
    save_param_set(params, seed, figpath)
    save_stats_csv(path=figpath+'.csv', seed=seed, data=data)

    return data


def safe_metrics(graph, metrics):
    rets = []
    for met_num,metric in enumerate(metrics):
        try:
            rets.append(metric['function'](graph))
        except Exception, inst:
            print 'error in computing: '+metric['name']
            print inst
            rets.append(graphutils.METRIC_ERROR)
Beispiel #6
0
def replica_vs_original(seed=None,
                        figpath=None,
                        generator_func=None,
                        G=None,
                        params=None,
                        num_replicas=150,
                        title_infix='',
                        metrics=None,
                        intermediates=False,
                        n_jobs=-1):
    #generate one or more replicas and compare them to the original graph
    if seed == None:
        seed = npr.randint(1E6)
    print 'rand seed: %d' % seed
    npr.seed(seed)
    random.seed(seed)

    if generator_func == None:
        generator_func = algorithms.generate_graph

    if G == None:
        G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist')

    if metrics == None:
        metrics = graphutils.default_metrics[:]
    metrics = filter(lambda m: m['optional'] < 2, metrics)
    if 'metric_runningtime_bound' in params:
        mrtb = params['metric_runningtime_bound']
        metrics = filter(lambda m: m['runningtime'] <= mrtb, metrics)
    metrics = filter(lambda m: m['name'] not in ['avg flow closeness'],
                     metrics)  #broken in NX 1.6
    metrics.reverse()

    if params == None:
        params = {
            'verbose': False,
            'node_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01],
            'edge_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01],
            'node_growth_rate': [0],
            'locality_bias_correction': 0.,
            'enforce_connected': True,
            'accept_chance_edges': 1.0,
            'retain_intermediates': intermediates
        }
    if intermediates:
        params['retain_intermediates'] = True
    print 'Params:'
    print params
    print 'Metrics:'
    print[metric['name'] for metric in metrics]

    replicas = replicate_graph(G=G,
                               generator_func=generator_func,
                               num_replicas=num_replicas,
                               params=params,
                               title_infix=title_infix,
                               n_jobs=n_jobs)
    jaccard_edges = evaluate_similarity(
        base_graphs=G, graphs=replicas,
        n_jobs=n_jobs)  #this is actually a mean
    vals_of_all = evaluate_metrics(graphs=[G] + replicas,
                                   metrics=metrics,
                                   n_jobs=n_jobs)
    vals_of_graph = [metric_data[0] for metric_data in vals_of_all]
    vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all]
    replica_statistics, figpath = plot_deviation(vals_of_replicas,
                                                 vals_of_graph, metrics,
                                                 figpath, jaccard_edges,
                                                 title_infix, seed,
                                                 getattr(G, 'name', ''))
    #pylab.show()
    data = {
        'metrics': [met['name'] for met in metrics],
        'name': getattr(G, 'name', ''),
        'params': params,
        'num_replicas': num_replicas,
        'figpath': figpath
    }
    data[0] = replica_statistics
    data[0].update({
        'vals_of_replicas': vals_of_replicas,
        'val_of_models': vals_of_graph,
        'avg_jaccard_edges': jaccard_edges
    })

    if intermediates:
        current_replicas = replicas
        for level in xrange(
                1,
                max(len(params.get('node_edit_rate', [])),
                    len(params.get('edge_edit_rate', [])),
                    len(params.get('node_growth_rate', [])),
                    len(params.get('edge_growth_rate', [])))):
            print 'LEVEL: %d' % level
            coarse_models = [
                r.coarser_graph.model_graph for r in current_replicas
            ]
            coarse_replicas = [r.coarser_graph for r in current_replicas]
            vals_of_models = evaluate_metrics(graphs=coarse_models,
                                              metrics=metrics,
                                              n_jobs=n_jobs)
            vals_of_replicas = evaluate_metrics(graphs=coarse_replicas,
                                                metrics=metrics,
                                                n_jobs=n_jobs)
            jaccard_edges = evaluate_similarity(base_graphs=coarse_models,
                                                graphs=coarse_replicas,
                                                n_jobs=n_jobs)

            replica_statistics, dummy \
                 = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models,
                                  metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges)
            current_replicas = coarse_replicas
            data[level] = replica_statistics
            data[level].update({
                'vals_of_replicas': vals_of_replicas,
                'vals_of_models': vals_of_models,
                'avg_jaccard_edges': jaccard_edges
            })
    graphutils.safe_pickle(path=figpath + '.pkl', data=data)
    save_param_set(params, seed, figpath)
    save_stats_csv(path=figpath + '.csv', seed=seed, data=data)

    return data