Ejemplo n.º 1
0
def expstatic_changedLP(dynamic_sbm_series,
                        graphs,
                        embedding,
                        rounds,
                        res_pre,
                        m_summ,
                        n_sample_nodes=1000,
                        train_ratio_init=0.5,
                        no_python=False,
                        is_undirected=True,
                        sampling_scheme="u_rand"):
    n_sample_nodes = int(n_sample_nodes)
    print('\tDynamic Link Prediction')
    summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    summ_file.close()
    T = len(graphs)
    # T_min = int(train_ratio_init * T)
    MAP = [None] * (T - 1)
    prec_curv = [None] * (T - 1)
    for i in range(T - 1):
        MAP[i] = [None] * rounds
        prec_curv[i] = [None] * rounds
    for t in range(T - 1):
        embedding.learn_embeddings(graphs[t])
        edges_add, edges_rm = getchangedlinks(graphs[t], graphs[t + 1])
        for r_id in range(rounds):
            MAP[t][r_id], prec_curv[t][r_id] = \
                evaluateDynamic_changed_LinkPrediction(graphs[t + 1], embedding,
                                                       rounds,
                                                       edges_add, edges_rm,
                                                       # dynamic_sbm_series[t][3],
                                                       n_sample_nodes=n_sample_nodes,
                                                       no_python=no_python,
                                                       is_undirected=is_undirected,
                                                       sampling_scheme=sampling_scheme)
        summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a')
        summ_file.write('\tt=%d%f/%f\t%s\n' %
                        (t, np.mean(MAP[t]), np.std(MAP[t]),
                         metrics.getPrecisionReport(prec_curv[t][0],
                                                    len(prec_curv[t][0]))))
        summ_file.close()
    # pickle.dump([MAP, prec_curv],
    #             open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme),
    #                  'wb'))
    return np.mean(np.array(MAP))
def expLP(graphs,
          embedding,
          rounds,
          res_pre,
          m_summ,
          n_sample_nodes=1000,
          train_ratio_init=0.5,
          no_python=False,
          is_undirected=True,
          sampling_scheme="u_rand"):
    """Function to evaluate link prediction
           
           Attributes:
               digraph (Object): Networkx Graph Object
               graph_embedding (object): Algorithm for learning graph embedding
               X_stat (ndarray): Embedding values of the graph.
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               res_pre (str): prefix to be used to store the result. 
               m_summ (str): summary to be used to save the result.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    n_sample_nodes = int(n_sample_nodes)
    print('\tDynamic Link Prediction')
    summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    summ_file.close()
    T = len(graphs)
    T_min = int(train_ratio_init * T)
    MAP = [None] * (T - T_min)
    prec_curv = [None] * (T - T_min)
    for i in range(T - T_min):
        MAP[i] = [None] * rounds
        prec_curv[i] = [None] * rounds
    for t in range(T_min, T):
        embedding.learn_embeddings(graphs[:t])
        for r_id in range(rounds):
            MAP[t - T_min][r_id], prec_curv[t - T_min][r_id] = \
                evaluateDynamicLinkPrediction(graphs[t], embedding,
                                              rounds,
                                              n_sample_nodes=n_sample_nodes,
                                              no_python=no_python,
                                              is_undirected=is_undirected,
                                              sampling_scheme=sampling_scheme)
        summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a')
        summ_file.write(
            '\tt=%d%f/%f\t%s\n' %
            (t - T_min, np.mean(MAP[t - T_min]), np.std(MAP[t - T_min]),
             metrics.getPrecisionReport(prec_curv[t - T_min][0],
                                        len(prec_curv[t - T_min][0]))))
        summ_file.close()
    # pickle.dump([MAP, prec_curv],
    #             open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme),
    #                  'wb'))
    return np.mean(np.array(MAP))
def expGR(digraph,
          graph_embedding,
          X,
          n_sampled_nodes,
          rounds,
          res_pre,
          m_summ,
          file_suffix=None,
          is_undirected=True,
          sampling_scheme="rw"):
    """Function to evaluate graph reconstruction
           
           Attributes:
               digraph (Object): Networkx Graph Object
               graph_embedding (object): Algorithm for learning graph embedding
               X_stat (ndarray): Embedding values of the graph.
               n_sampled_nodes (int): Total number of nodes.
               rounds (int): Number of times to run the experiment
               res_pre (str): prefix to be used to store the result. 
               m_summ (str): summary to be used to save the result.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    print('\tGraph Reconstruction')
    n_sampled_nodes = int(n_sampled_nodes)
    summ_file = open('%s_%s.grsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    if digraph.number_of_nodes() <= n_sampled_nodes:
        rounds = 1
    MAP = [None] * rounds
    prec_curv = [None] * rounds
    err = [None] * rounds
    err_b = [None] * rounds
    n_nodes = [None] * rounds
    n_edges = [None] * rounds
    for round_id in range(rounds):
        if sampling_scheme == "u_rand":
            sampled_digraph, node_l = graph_util.sample_graph(
                digraph, n_sampled_nodes=n_sampled_nodes)
        else:
            sampled_digraph, node_l = graph_util.sample_graph_rw_int(
                digraph, n_sampled_nodes=n_sampled_nodes)
        n_nodes[round_id] = sampled_digraph.number_of_nodes()
        n_edges[round_id] = sampled_digraph.number_of_edges()
        print('\t\tRound: %d, n_nodes: %d, n_edges:%d\n' %
              (round_id, n_nodes[round_id], n_edges[round_id]))
        sampled_X = X[node_l]
        # sampled_X = np.expand_dims(sampled_X,axis=1)
        MAP[round_id], prec_curv[round_id], err[round_id], err_b[round_id] = \
            evaluateStaticGraphReconstruction(sampled_digraph,
                                              graph_embedding,
                                              sampled_X,
                                              node_l,
                                              file_suffix= file_suffix,
                                              is_undirected=is_undirected
                                              )
    try:
        summ_file.write('Err: %f/%f\n' % (np.mean(err), np.std(err)))
        summ_file.write('Err_b: %f/%f\n' % (np.mean(err_b), np.std(err_b)))
    except TypeError:
        pass
    summ_file.write('%f/%f\t%s\n' %
                    (np.mean(MAP), np.std(MAP),
                     metrics.getPrecisionReport(prec_curv[0], n_edges[0])))
    pickle.dump([n_nodes, n_edges, MAP, prec_curv, err, err_b],
                open('%s_%s.gr' % (res_pre, m_summ), 'wb'))
    return np.mean(np.array(MAP))