Example #1
0
 def __init__(self, dim=128, lp_model=LogisticRegression(solver='liblinear')):
     # General evaluation parameters
     self.dim = dim
     self.edge_embed_method = None
     self.lp_model = lp_model
     # Train and validation data split objects
     self.traintest_split = split.EvalSplit()
     # Results
     self._results = list()
Example #2
0
def run_test():

    random.seed(42)
    np.random.seed(42)

    # Set some variables
    filename = "./data/network.edgelist"
    directed = False

    # Load the test graph
    G = pp.load_graph(filename, delimiter=",", comments='#', directed=directed)
    G, ids = pp.prep_graph(G)

    # Print some stars about the graph
    pp.get_stats(G)

    # Generate one train/test split with all edges in train set
    start = time()
    traintest_split = split.EvalSplit()
    traintest_split.compute_splits(G, train_frac=0.9)
    end = time() - start
    print("\nSplits computed in {} sec".format(end))

    # Create an evaluator
    nee = evaluator.LPEvaluator(traintest_split)

    # Test baselines
    start = time()
    test_baselines(nee, directed)
    end = time() - start
    print("\nBaselines computed in {} sec".format(end))

    # Test Katz
    start = time()
    test_katz(nee)
    end = time() - start
    print("\nKatz computed in {} sec".format(end))
Example #3
0
    def evaluate_ne_cmd(self, method_name, command, edge_embedding_methods, input_delim, emb_delim, tune_params=None,
                        maximize='auroc', verbose=True):
        r"""
        Evaluates node embedding methods and tunes their parameters from the method's command line call string.
        This method generates automatically train/validation splits with the same parameters as the train/test splits.

        Parameters
        ----------
        method_name : basestring
            A string indicating the name of the method to be evaluated.
        command : basestring
            A string containing the call to the method as it would be written in the command line.
            For the values associated with the input file, output file and embedding dimensionality placeholders
            (i.e. {}) need to be provided precisely IN THIS ORDER.
        edge_embedding_methods : array-like
            A list of methods used to compute edge embeddings from the node embeddings output by the NE models.
            The accepted values are the function names in evalne.evaluation.edge_embeddings.
        input_delim : basestring
            The delimiter expected by the method as input (edgelist).
        emb_delim : basestring
            The delimiter provided by the method in the output (node embeddings)
        tune_params : basestring
            A string containing all the parameters to be tuned and their values.
        maximize : basestring
            The score to maximize while performing parameter tuning.
        verbose : bool
            A parameter to control the amount of screen output.
        """
        # Check if tuning parameters is needed
        if tune_params is not None:
            if verbose:
                print('Tuning parameters for {} ...'.format(method_name))

            # Variable to store the best results and parameters for each ee_method
            best_results = list()
            best_params = list()
            for i in range(len(edge_embedding_methods)):
                best_results.append(None)
                best_params.append('')

            # Prepare the parameters
            sep = re.compile(r"--\w+")
            if sep.match(tune_params.strip()) is not None:
                params = tune_params.split('--')
                dash = ' --'
            else:
                params = tune_params.split('-')
                dash = ' -'
            params.pop(0)     # the first element is always nothing
            param_names = list()
            for i in range(len(params)):
                aux = (params[i].strip()).split()
                param_names.append(aux.pop(0))
                params[i] = aux

            # Prepare validation data
            valid_split = split.EvalSplit()
            valid_split.compute_splits(self.traintest_split.TG, train_frac=self.traintest_split.train_frac,
                                       fast_split=self.traintest_split.fast_split,
                                       owa=self.traintest_split.owa,
                                       num_fe_train=self.traintest_split.num_fe_train,
                                       num_fe_test=self.traintest_split.num_fe_test,
                                       seed=self.traintest_split.seed, verbose=verbose)

            # If there is only one parameter we treat it separately
            if len(param_names) == 1:
                for i in params[0]:
                    # Format the parameter combination
                    param_str = dash + param_names[0] + ' ' + i

                    # Create a command string with the new parameter
                    ext_command = command + param_str

                    results = self._evaluate_ne_cmd(valid_split, method_name, ext_command, edge_embedding_methods,
                                                    input_delim, emb_delim, verbose)

                    results = list(results)
                    # Log the best results
                    for j in range(len(results)):
                        if best_results[j] is None:
                            best_results[j] = results[j]
                            best_params[j] = param_str
                        else:
                            func1 = getattr(results[j].test_scores, str(maximize))
                            func2 = getattr(best_results[j].test_scores, str(maximize))
                            if func1() < func2():
                                best_results[j] = results[j]
                                best_params[j] = param_str
            else:
                # All parameter combinations
                combinations = list(itertools.product(*params))
                for comb in combinations:
                    # Format the parameter combination
                    param_str = ''
                    for i in range(len(comb)):
                        param_str += dash + param_names[i] + ' ' + comb[i]

                    # Update the command string with the parameter combination
                    ext_command = command + param_str

                    results = self._evaluate_ne_cmd(valid_split, method_name, ext_command, edge_embedding_methods,
                                                    input_delim, emb_delim, verbose)

                    results = list(results)
                    # Log the best results
                    for i in range(len(results)):
                        if best_results[i] is None:
                            best_results[i] = results[i]
                            best_params[i] = param_str
                        else:
                            func1 = getattr(results[i].test_scores, str(maximize))
                            func2 = getattr(best_results[i].test_scores, str(maximize))
                            if func1() < func2():
                                best_results[i] = results[i]
                                best_params[i] = param_str

            # We have found the best parameters, train the model again on the whole train data to get actual results
            results = list()
            for i in range(len(edge_embedding_methods)):
                ext_command = command + best_params[i]
                results.extend(self._evaluate_ne_cmd(self.traintest_split, method_name, ext_command,
                                                     [edge_embedding_methods[i]], input_delim, emb_delim, verbose))

            # zip(edge_embedding_methods, best_params)
            # data = collections.defaultdict(list)
            # for best in set(best_params):
            #     ext_command = command + best
            #     results.extend(self._evaluate_ne_cmd(self.traintest_split, method_name, ext_command,
            #                                          [edge_embedding_methods[i]], input_delim, emb_delim, verbose))

            self._results.extend(results)
        else:
            # No parameter tuning is needed
            results = self._evaluate_ne_cmd(self.traintest_split, method_name, command, edge_embedding_methods,
                                            input_delim, emb_delim, verbose)
            self._results.extend(results)
Example #4
0
    def evaluate_cmd(self, method_name, method_type, command, edge_embedding_methods, input_delim, output_delim,
                     tune_params=None, maximize='auroc', write_weights=False, write_dir=False, verbose=True):
        r"""
        Evaluates an embedding method and tunes its parameters from the method's command line call string. This
        function can evaluate node embedding, edge embedding or end to end embedding methods.
        If model parameter tuning is required, this method automatically generates train/validation splits
        with the same parameters as the train/test splits.

        Parameters
        ----------
        method_name : basestring
            A string indicating the name of the method to be evaluated.
        method_type : basestring
            A string indicating the type of embedding method (i.e. ne, ee, e2e)
        command : basestring
            A string containing the call to the method as it would be written in the command line.
            For 'ne' methods placeholders (i.e. {}) need to be provided for the parameters: input network file,
            output file and embedding dimensionality, precisely IN THIS ORDER.
            For 'ee' methods with parameters: input network file, input train edgelist, input test edgelist, output
            train embeddings, output test embeddings and embedding dimensionality, 6 placeholders (i.e. {}) need to
            be provided, precisely IN THIS ORDER.
            For methods with parameters: input network file, input edgelist, output embeddings, and embedding
            dimensionality, 4 placeholders (i.e. {}) need to be provided, precisely IN THIS ORDER.
            For 'e2e' methods with parameters: input network file, input train edgelist, input test edgelist, output
            train predictions, output test predictions and embedding dimensionality, 6 placeholders (i.e. {}) need
            to be provided, precisely IN THIS ORDER.
            For methods with parameters: input network file, input edgelist, output predictions, and embedding
            dimensionality, 4 placeholders (i.e. {}) need to be provided, precisely IN THIS ORDER.
        edge_embedding_methods : array-like
            A list of methods used to compute edge embeddings from the node embeddings output by the NE models.
            The accepted values are the function names in evalne.evaluation.edge_embeddings.
            When evaluating 'ee' or 'e2e' methods, this parameter is ignored.
        input_delim : basestring
            The delimiter expected by the method as input (edgelist).
        output_delim : basestring
            The delimiter provided by the method in the output
        tune_params : basestring
            A string containing all the parameters to be tuned and their values.
        maximize : basestring
            The score to maximize while performing parameter tuning.
        write_weights : bool, optional
            If True the train graph passed to the embedding methods will be stored as weighted edgelist
            (e.g. triplets src, dst, weight) otherwise as normal edgelist. If the graph edges have no weight attribute
            and this parameter is set to True, a weight of 1 will be assigned to each edge. Default is False.
        write_dir : bool, optional
            This option is only relevant for undirected graphs. If False, the train graph will be stored with a single
            direction of the edges. If True, both directions of edges will be stored. Default is False.
        verbose : bool
            A parameter to control the amount of screen output.
        """
        # If the method evaluated does not require edge embeddings set this parameter to ['none']
        if method_type != 'ne':
            edge_embedding_methods = ['none']
            self.edge_embed_method = None

        # Check if tuning parameters is needed
        if tune_params is not None:
            print('Tuning parameters for {} ...'.format(method_name))

            # Variable to store the best results and parameters for each ee_method
            best_results = list()
            best_params = list()
            for i in range(len(edge_embedding_methods)):
                best_results.append(None)
                best_params.append('')

            # Prepare the parameters
            sep = re.compile(r"--\w+")
            if sep.match(tune_params.strip()) is not None:
                params = tune_params.split('--')
                dash = ' --'
            else:
                params = tune_params.split('-')
                dash = ' -'
            params.pop(0)     # the first element is always nothing
            param_names = list()
            for i in range(len(params)):
                aux = (params[i].strip()).split()   # Splits the parameter name from the parameter values to be tested
                param_names.append(aux.pop(0))
                params[i] = aux

            # Prepare validation data
            valid_split = split.EvalSplit()
            valid_split.compute_splits(self.traintest_split.TG, train_frac=0.9,
                                       fast_split=self.traintest_split.fast_split,
                                       owa=self.traintest_split.owa,
                                       num_fe_train=self.traintest_split.num_fe_train,
                                       num_fe_test=self.traintest_split.num_fe_test,
                                       split_id=self.traintest_split.split_id, verbose=verbose)

            # If there is only one parameter we treat it separately
            if len(param_names) == 1:
                for i in params[0]:
                    # Format the parameter combination
                    param_str = dash + param_names[0] + ' ' + i

                    # Create a command string with the new parameter
                    ext_command = command + param_str

                    # Call the corresponding evaluation method
                    if method_type == 'ne':
                        results = self._evaluate_ne_cmd(valid_split, method_name, ext_command, edge_embedding_methods,
                                                        input_delim, output_delim, write_weights, write_dir, verbose)
                    elif method_type == 'ee' or method_type == 'e2e':
                        results = self._evaluate_ee_e2e_cmd(valid_split, method_name, method_type, ext_command,
                                                            input_delim, output_delim, write_weights, write_dir,
                                                            verbose)
                    else:
                        raise ValueError('Method type {} unknown!'.format(method_type))
                    results = list(results)

                    # Log the best results
                    for j in range(len(results)):
                        if best_results[j] is None:
                            best_results[j] = results[j]
                            best_params[j] = param_str
                        else:
                            func1 = getattr(results[j].test_scores, str(maximize))
                            func2 = getattr(best_results[j].test_scores, str(maximize))
                            if func1() > func2():
                                best_results[j] = results[j]
                                best_params[j] = param_str
            else:
                # All parameter combinations
                combinations = list(itertools.product(*params))
                for comb in combinations:
                    # Format the parameter combination
                    param_str = ''
                    for i in range(len(comb)):
                        param_str += dash + param_names[i] + ' ' + comb[i]

                    # Update the command string with the parameter combination
                    ext_command = command + param_str

                    # Call the corresponding evaluation method
                    if method_type == 'ne':
                        results = self._evaluate_ne_cmd(valid_split, method_name, ext_command, edge_embedding_methods,
                                                        input_delim, output_delim, write_weights, write_dir, verbose)
                    elif method_type == 'ee' or method_type == 'e2e':
                        results = self._evaluate_ee_e2e_cmd(valid_split, method_name, method_type, ext_command,
                                                            input_delim, output_delim, write_weights, write_dir,
                                                            verbose)
                    else:
                        raise ValueError('Method type {} unknown!'.format(method_type))
                    results = list(results)

                    # Log the best results
                    for i in range(len(results)):
                        if best_results[i] is None:
                            best_results[i] = results[i]
                            best_params[i] = param_str
                        else:
                            func1 = getattr(results[i].test_scores, str(maximize))
                            func2 = getattr(best_results[i].test_scores, str(maximize))
                            if func1() > func2():
                                best_results[i] = results[i]
                                best_params[i] = param_str

            # We have found the best parameters, train the model again on the whole train data to get actual results
            results = list()
            for i in range(len(edge_embedding_methods)):
                ext_command = command + best_params[i]
                print('Best parameters for {} using ee method {} are: {}'
                      .format(method_name, edge_embedding_methods[i], best_params[i]))

                # Call the corresponding evaluation method
                if method_type == 'ne':
                    results.extend(self._evaluate_ne_cmd(self.traintest_split, method_name, ext_command,
                                                         [edge_embedding_methods[i]], input_delim, output_delim,
                                                         write_weights, write_dir, verbose))
                elif method_type == 'ee' or method_type == 'e2e':
                    results.extend(self._evaluate_ee_e2e_cmd(self.traintest_split, method_name, method_type,
                                                             ext_command, input_delim, output_delim, write_weights,
                                                             write_dir, verbose))
                else:
                    raise ValueError('Method type {} unknown!'.format(method_type))

            # zip(edge_embedding_methods, best_params)
            # data = collections.defaultdict(list)
            # for best in set(best_params):
            #     ext_command = command + best
            #     results.extend(self._evaluate_ne_cmd(self.traintest_split, method_name, ext_command,
            #                                          [edge_embedding_methods[i]], input_delim, emb_delim, verbose))

            # Store the evaluation results
            self._results.extend(results)
        else:
            # No parameter tuning is needed
            # Call the corresponding evaluation method
            if method_type == 'ne':
                results = self._evaluate_ne_cmd(self.traintest_split, method_name, command, edge_embedding_methods,
                                                input_delim, output_delim, write_weights, write_dir, verbose)
            elif method_type == 'ee' or method_type == 'e2e':
                results = self._evaluate_ee_e2e_cmd(self.traintest_split, method_name, method_type, command,
                                                    input_delim, output_delim, write_weights, write_dir, verbose)
            else:
                raise ValueError('Method type {} unknown!'.format(method_type))

            # Store the evaluation results
            self._results.extend(results)