def run_louvain_multiplex_test(n,
                               nlayers,
                               mu,
                               p_eta,
                               omega,
                               gamma,
                               ntrials,
                               use_blockmultiplex=False):
    ncoms = 5

    finoutdir = os.path.join(
        matlabbench_dir,
        'temporal_noinit_matlab_test_data_n{:d}_nlayers{:d}_trials{:d}_{:d}ncoms_multilayer'
        .format(n, nlayers, ntrials, ncoms))
    if not os.path.exists(finoutdir):
        os.makedirs(finoutdir)

    output = pd.DataFrame()
    outfile = "{:}/temporal_noinit_test_n{:d}_L{:d}_mu{:.4f}_p{:.4f}_gamma{:.4f}_omega{:.4f}_trials{:d}.csv".format(
        finoutdir, n, nlayers, mu, p_eta, gamma, omega, ntrials)

    qmax = 8
    max_iters = 400
    print(
        'running {:d} trials at gamma={:.4f}, omega={:.3f}, p={:.4f}, and mu={:.4f}'
        .format(ntrials, gamma, omega, p_eta, mu))
    for trial in range(ntrials):

        t = time()
        graph = create_temporal_graph(n_nodes=n,
                                      mu=mu,
                                      p=p_eta,
                                      n_layers=nlayers,
                                      ncoms=ncoms)
        # with gzip.open("working_graph.gz",'wb') as fh:
        #     pickle.dump(graph,fh)
        #
        # with gzip.open("working_graph.gz",'rb') as fh:
        #     graph=pickle.load(fh)

        # print('time creating graph: {:.3f}'.format(time()-t))
        # start_vec = get_starting_partition(graph, gamma=gamma, omega=omega, q=ncoms)
        # print('time creating starting vec:{:.3f}'.format(time() - t))
        # print('AMI start_vec', graph.get_AMI_with_communities(start_vec))
        # ground_margs = create_marginals_from_comvec(start_vec, SNR=5,
        #                                             q=qmax)

        mlbp = modbp.ModularityBP(
            mlgraph=graph,
            accuracy_off=True,
            use_effective=True,
            align_communities_across_layers_temporal=True,
            comm_vec=graph.comm_vec)
        bstars = [
            mlbp.get_bstar(q, omega=omega) for q in range(1, qmax + 2, 2)
        ]
        # bstars = np.linspace(1,4,10)

        # bstars = [mlbp.get_bstar(qmax) ]

        #betas = np.linspace(bstars[0], bstars[-1], len(bstars) * 8)
        betas = bstars
        # betas=[.84]
        notconverged = 0
        for j, beta in enumerate(betas):
            t = time()
            mlbp.run_modbp(
                beta=beta,
                niter=max_iters,
                reset=True,
                q=qmax,
                # starting_marginals=ground_margs,
                resgamma=gamma,
                omega=omega)
            print(
                "time running modbp at mu,p={:.3f},{:.3f}: {:.3f}. niters={:.3f}"
                .format(mu, p_eta,
                        time() - t,
                        mlbp.retrieval_modularities.iloc[-1, :]['niters']))
            mlbp_rm = mlbp.retrieval_modularities
            if mlbp_rm.iloc[-1, :][
                    'converged'] == False:  #keep track of how many converges we have
                notconverged += 1
            cind = output.shape[0]
            ind = mlbp_rm.index[mlbp_rm.shape[0] - 1]  # get last line
            for col in mlbp_rm.columns:
                output.loc[cind, col] = mlbp_rm.loc[ind, col]
            output.loc[cind, 'isGenLouvain'] = False
            output.loc[cind, 'mu'] = mu
            output.loc[cind, 'p'] = p_eta
            output.loc[cind, 'trial'] = trial

            # run genlouvain on graph
            t = time()

            print(output.loc[cind, ['beta', 'niters', 'AMI', 'AMI_layer_avg']])

            if trial == 0:  # write out whole thing
                with open(outfile, 'w') as fh:
                    output.to_csv(fh, header=True)
            else:
                with open(
                        outfile, 'a'
                ) as fh:  # writeout last 2 rows for genlouvain + multimodbp
                    output.iloc[-1:, :].to_csv(fh, header=False)

            # if notconverged>1: #hasn't converged twice now.
            #     break
        #we now only call this once each trial with iterated version
        t = time()
        try:  # the matlab call has been dicey on the cluster for some.  This results in jobs quitting prematurely.
            #use spectral initialization
            S = get_starting_partition(graph,
                                       gamma=gamma,
                                       omega=omega,
                                       q=ncoms)
            # S = call_gen_louvain(graph, gamma, omega)
            ami_layer = graph.get_AMI_layer_avg_with_communities(S)
            ami = graph.get_AMI_with_communities(S)
            nmi = graph.get_AMI_with_communities(S, useNMI=True)
            nmi_layer = graph.get_AMI_layer_avg_with_communities(S,
                                                                 useNMI=True)

            cmod = modbp.calc_modularity(graph, S, resgamma=gamma, omega=omega)
            cind = output.shape[0]
            output.loc[cind, 'isGenLouvain'] = True
            output.loc[cind, 'mu'] = mu
            output.loc[cind, 'p'] = p_eta
            output.loc[cind, 'trial'] = trial
            output.loc[cind, 'AMI'] = ami
            output.loc[cind, 'AMI_layer_avg'] = ami_layer
            output.loc[cind, 'NMI'] = nmi
            output.loc[cind, 'NMI_layer_avg'] = nmi_layer
            output.loc[cind, 'retrieval_modularity'] = cmod
            output.loc[cind, 'resgamma'] = gamma
            output.loc[cind, 'omega'] = omega
            Scoms, Scnt = np.unique(S, return_counts=True)
            output.loc[cind, 'num_coms'] = np.sum(Scnt > 5)
            matlabfailed = False
        except:
            matlabfailed = True

        if not matlabfailed:
            with open(
                    outfile, 'a'
            ) as fh:  # writeout last 2 rows for genlouvain + multimodbp
                output.iloc[-1:, :].to_csv(fh, header=False)

        print("time running matlab:{:.3f}. sucess: {:}".format(
            time() - t, str(not matlabfailed)))
        # if trial == 0:
        #     with open(outfile, 'w') as fh:
        #         output.to_csv(fh, header=True)
        # else:
        #     with open(outfile, 'a') as fh:  # writeout as we go
        #         output.iloc[[-1], :].to_csv(fh, header=False)
    # plt.close()
    # f,a=plt.subplots(1,1,figsize=(5,5))
    # a.scatter(output['beta'].values,output['niters'].values)
    # a2=a.twinx()
    # a2.scatter(output['beta'].values,output['AMI_layer_avg'].values)
    #
    # plt.show()
    return 0
예제 #2
0
def run_louvain_multiplex_test(n,nlayers,mu,p_eta,omega,gamma,ntrials,use_blockmultiplex=False):
    ncoms=10

    finoutdir = os.path.join(matlabbench_dir, 'spectral_only_uninit_multiplex_matlab_test_data_n{:d}_nlayers{:d}_trials{:d}_{:d}ncoms_multilayer'.format(n,nlayers,ntrials,ncoms))
    if not os.path.exists(finoutdir):
        os.makedirs(finoutdir)

    output = pd.DataFrame()
    outfile="{:}/multiplex_test_n{:d}_L{:d}_mu{:.4f}_p{:.4f}_gamma{:.4f}_omega{:.4f}_trials{:d}.csv".format(finoutdir,n,nlayers,mu,p_eta, gamma,omega,ntrials)

    qmax=12
    max_iters=400
    print('running {:d} trials at gamma={:.4f}, omega={:.3f}, p={:.4f}, and mu={:.4f}'.format(ntrials,gamma,omega,p_eta,mu))
    for trial in range(ntrials):

        t=time()
        graph=create_multiplex_graph(n_nodes=n, mu=mu, p=p_eta,
                                      n_layers=nlayers, ncoms=ncoms)
        # with gzip.open("working_graph.gz",'wb') as fh:
        #     pickle.dump(graph,fh)

        print('time creating graph: {:.3f}'.format(time()-t))
        mlbp = modbp.ModularityBP(mlgraph=graph, accuracy_off=True, use_effective=True,
                                  align_communities_across_layers_multiplex=True,
                                  comm_vec=graph.comm_vec)
        bstars = [mlbp.get_bstar(q,omega=omega) for q in range(1, qmax+2,2)]
        betas=bstars
        notconverged = 0
        for j,beta in enumerate(betas):
            t=time()

            mlbp.run_modbp(beta=beta, niter=max_iters, q=qmax, reset=True,
                            dumping_rate=1.0,
                            resgamma=gamma, omega=omega)


            print("time running modbp at mu,p={:.3f},{:.3f}: {:.3f}. niters={:.3f}".format(mu,p_eta,time()-t,mlbp.retrieval_modularities.iloc[-1,:]['niters']))
            mlbp_rm = mlbp.retrieval_modularities
            if mlbp_rm.iloc[-1,:]['converged'] == False: #keep track of how many converges we have
                notconverged+=1
            cind = output.shape[0]
            ind = mlbp_rm.index[mlbp_rm.shape[0] - 1]  # get last line
            for col in mlbp_rm.columns:
                output.loc[cind, col] = mlbp_rm.loc[ind, col]
            output.loc[cind, 'isSpectral'] = False
            output.loc[cind, 'mu'] = mu
            output.loc[cind, 'p'] = p_eta
            output.loc[cind, 'trial'] = trial

            # run genlouvain on graph
            t=time()

            print(output.loc[cind,['beta','niters','AMI','AMI_layer_avg']])

            if trial == 0:  # write out whole thing
                with open(outfile, 'w') as fh:
                    output.to_csv(fh, header=True)
            else:
                with open(outfile, 'a') as fh:  # writeout last 2 rows for genlouvain + multimodbp
                    output.iloc[-1:, :].to_csv(fh, header=False)

            if notconverged>2: #hasn't converged three now.
                break

            #we have found 2 non-trivial structures in a row
            if np.sum(np.logical_and(np.logical_not(mlbp_rm['is_trivial']),
                                  mlbp_rm['converged']))>2:
                break


        #we now only call this once each trial with iterated version
        # try:  # the matlab call has been dicey on the cluster for some.  This results in jobs quitting prematurely.
        S = get_starting_partition_modularity(graph, gamma=gamma, omega=omega, q=ncoms)
        t=time()
        # S = get_starting_partition_multimodbp_nodes(graph,gamma=gamma,omega=omega,q=ncoms)
        # S = get_starting_partition_multimodbp(graph,gamma=gamma,omega=omega,q=ncoms)
        print("time creating starting partition from mod matrix : {:.4f}".format(time()-t))
        ami_layer = graph.get_AMI_layer_avg_with_communities(S)
        ami = graph.get_AMI_with_communities(S)
        nmi =  graph.get_AMI_with_communities(S,useNMI=True)
        nmi_layer  =  graph.get_AMI_layer_avg_with_communities(S,useNMI=True)

        cmod = modbp.calc_modularity(graph, S, resgamma=gamma, omega=omega)
        cind = output.shape[0]
        output.loc[cind, 'isSpectral'] = True
        output.loc[cind, 'mu'] = mu
        output.loc[cind, 'p'] = p_eta
        output.loc[cind, 'trial'] = trial
        output.loc[cind, 'AMI'] = ami
        output.loc[cind, 'AMI_layer_avg'] = ami_layer
        output.loc[cind, 'NMI'] = nmi
        output.loc[cind, 'NMI_layer_avg'] = nmi_layer
        output.loc[cind, 'retrieval_modularity'] = cmod
        output.loc[cind, 'resgamma'] = gamma
        output.loc[cind, 'omega'] = omega
        Scoms, Scnt = np.unique(S, return_counts=True)
        output.loc[cind, 'num_coms'] = np.sum(Scnt > 5)
        print(output.loc[cind, ['isSpectral', 'AMI', 'AMI_layer_avg']])


        with open(outfile, 'a') as fh:  # writeout last row
            output.iloc[-1:, :].to_csv(fh, header=False)

        print("time running matlab:{:.3f}. ".format(time() - t))

    return 0
def main():

    # generate a graph and then run it some number of times
    n = int(sys.argv[1])
    q = int(sys.argv[2])
    nlayers = int(sys.argv[3])
    eta = float(sys.argv[4])
    c = float(sys.argv[5])
    ep = float(sys.argv[6])
    ntrials = int(sys.argv[7])
    omega = float(sys.argv[8])
    gamma = float(sys.argv[9])
    nblocks = q
    pin = c / (1.0 + ep * (q - 1.0)) / (n * 1.0 / q)
    pout = c / (1 + (q - 1.0) / ep) / (n * 1.0 / q)
    prob_mat = np.identity(nblocks) * pin + (np.ones(
        (nblocks, nblocks)) - np.identity(nblocks)) * pout
    output = pd.DataFrame(columns=[
        'ep', 'eta', 'resgamma', 'omega', 'AMI', 'AMI_layer_avg', 'modularity'
    ])

    finoutdir = os.path.join(
        clusterdir,
        'experiments/genlouvain_mlsbm/sbm_test_data/SBM_test_data_n{:}_q{:d}_nt{:}'
        .format(n, q, ntrials))
    if not os.path.exists(finoutdir):
        os.makedirs(finoutdir)
    outfile = os.path.join(
        finoutdir,
        "sbm_n{:d}_q{:d}_t{:d}_eta{:.2f}_ep{:.2f}_omega{:.2f}_gamma{:.2f}.csv".
        format(n, q, nlayers, eta, ep, omega, gamma))

    for trial in range(ntrials):
        ml_sbm = modbp.MultilayerSBM(n,
                                     comm_prob_mat=prob_mat,
                                     layers=nlayers,
                                     transition_prob=eta)
        mgraph = modbp.MultilayerGraph(ml_sbm.intraedges,
                                       ml_sbm.layer_vec,
                                       ml_sbm.interedges,
                                       comm_vec=ml_sbm.get_all_layers_block())

        A, C = mgraph.to_scipy_csr()
        scio_outfile = os.path.join(
            matlaboutdir,
            'sbm_n{:d}_q{:d}_t{:d}_eta{:.2f}_ep{:.2f}_omega{:.3f}_gamma{:.3f}_trial{:}.mat'
            .format(n, q, nlayers, eta, ep, omega, gamma, trial))
        matlaboutput = os.path.join(
            matlaboutdir,
            'sbm_n{:d}_q{:d}_t{:d}_eta{:.2f}_ep{:.2f}_omega{:.3f}_gamma{:.3f}_trial{:}_output.mat'
            .format(n, q, nlayers, eta, ep, omega, gamma, trial))
        scio.savemat(scio_outfile, {"A": A, "C": C})
        parameters = [
            call_matlabfile, scio_outfile, matlaboutput,
            "{:.4f}".format(gamma), "{:.4f}".format(omega)
        ]
        process = Popen(parameters, stderr=PIPE, stdout=PIPE)
        stdout, stderr = process.communicate()
        process.wait()
        if process.returncode != 0:
            print("matlab call failed")
        #print(stderr)
        #print(stdout)

        S = scio.loadmat(matlaboutput)['S'][:, 0]
        #modbp.ModularityBP(mlgraph=mgraph,align_communities_across_layers=True)
        #modbp.partitions[0]=S
        #modbp._perform_permuation_sweep(0)
        ami = mgraph.get_AMI_with_communities(S)
        plt.close()
        f, a = plt.subplots(1, 2, figsize=(10, 5))
        mgraph.plot_communities(ax=a[0])
        mgraph.plot_communities(S, ax=a[1])
        plt.show()
        ami_layer_avg = mgraph.get_AMI_layer_avg_with_communities(S)
        mod = modbp.calc_modularity(mgraph,
                                    partition=S,
                                    resgamma=gamma,
                                    omega=omega)
        cind = output.shape[0]
        output.loc[cind, [
            'ep', 'eta', 'resgamma', 'omega', 'AMI', 'AMI_layer_avg',
            'modularity'
        ]] = ep, eta, gamma, omega, ami, ami_layer_avg, mod
        # output.loc[cind, ['ep', 'eta']] = [ep, eta]
        if trial == 0:
            with open(outfile, 'w') as fh:
                output.to_csv(fh, header=True)
        elif trial > 0:
            with open(outfile, 'a') as fh:  #writeout as we go
                output.iloc[[-1], :].to_csv(fh, header=False)
        try:
            os.remove(scio_outfile)
        except:
            print('could not remove {}'.format(scio_outfile))
        try:
            os.remove(matlaboutput)
        except:
            print('could not remove {}'.format(matlaboutput))

    return 0