Exemple #1
0
 def evaluate(self, term_list_1, term_list_2):
     """Performs a set of evaluations (in no particular order) and
     returns their results as members of a ResultSet"""
     results=ResultSet()
     for each_evaluator in self:
         logging.log(ULTRADEBUG, "Applying %s as part of an EvaluationGroup", 
                       each_evaluator.__class__.__name__)
         results.add(each_evaluator.evaluate(term_list_1, term_list_2))
     return results
Exemple #2
0
 def create_graph(self, list_of_lines):
     # Make sure that the measurements only reflect one run
     """Sets up the creation of a graph, ensuring that the metrics
     from the creation reflect only the latest run."""
     self._measurements = ResultSet()
     self._line_set_id = list_of_lines.set_id
     self._preprocess_article(list_of_lines.lines)
     graph = self._create_graph(list_of_lines.lines)
     graph = self._post_process_graph(graph)
     self._compute_measurements(list_of_lines.lines)
     return graph
Exemple #3
0
 def __init__(self,
              type_of_graph_to_build=Graph,
              node_weight_threshold=0.0,
              link_weight_threshold=0.0,
              tf_idf_provider=None,
              add_orphan_nodes=True):
     """Accepts a type of Graph to build (so we can use Graph subclasses
     without creating a separate hierarchy of GraphBuilder subclasses)"""
     self._type_of_graph_to_build = type_of_graph_to_build
     self._node_weight_threshold = node_weight_threshold
     self._link_weight_threshold = link_weight_threshold
     self._tf_idf = tf_idf_provider
     self._tf_idf_scores = None
     self._line_set_id = None
     self._node_cache = set([])
     self._add_orphan_nodes = add_orphan_nodes
     self._measurements = ResultSet()
Exemple #4
0
 def create_graph(self, list_of_lines):
     # Make sure that the measurements only reflect one run
     """Sets up the creation of a graph, ensuring that the metrics
     from the creation reflect only the latest run."""
     self._measurements=ResultSet()
     self._line_set_id=list_of_lines.set_id
     self._preprocess_article(list_of_lines.lines)
     graph=self._create_graph(list_of_lines.lines)
     graph=self._post_process_graph(graph)
     self._compute_measurements(list_of_lines.lines)
     return graph
Exemple #5
0
 def __init__(self, type_of_graph_to_build=Graph, 
              node_weight_threshold=0.0,
              link_weight_threshold=0.0,
              tf_idf_provider=None,
              add_orphan_nodes=True):
     """Accepts a type of Graph to build (so we can use Graph subclasses
     without creating a separate hierarchy of GraphBuilder subclasses)"""
     self._type_of_graph_to_build=type_of_graph_to_build
     self._node_weight_threshold=node_weight_threshold
     self._link_weight_threshold=link_weight_threshold
     self._tf_idf=tf_idf_provider
     self._tf_idf_scores=None
     self._line_set_id=None
     self._node_cache=set([])
     self._add_orphan_nodes=add_orphan_nodes
     self._measurements=ResultSet()
Exemple #6
0
class GraphBuilder(object):
    """GraphBuilder is a generic base class to derive. It provides a template
    to build the actual GraphBuilders, that should subclass this one. The
    design provides hooks to filter each node and link as it's considered, and
    those may be overridden by descendants to customize processing easily.
    
    The default filtering behavior is filtering by confidence (since it's
    something that will come in quite handy). 
    
    You can also specify whether to add the nodes that don't belong to a link
    back to the graph as nodes linked to themselves upon consolidation via the
    add_orphan_nodes parameter (it defaults to True)."""
    def __init__(self, type_of_graph_to_build=Graph, 
                 node_weight_threshold=0.0,
                 link_weight_threshold=0.0,
                 tf_idf_provider=None,
                 add_orphan_nodes=True):
        """Accepts a type of Graph to build (so we can use Graph subclasses
        without creating a separate hierarchy of GraphBuilder subclasses)"""
        self._type_of_graph_to_build=type_of_graph_to_build
        self._node_weight_threshold=node_weight_threshold
        self._link_weight_threshold=link_weight_threshold
        self._tf_idf=tf_idf_provider
        self._tf_idf_scores=None
        self._line_set_id=None
        self._node_cache=set([])
        self._add_orphan_nodes=add_orphan_nodes
        self._measurements=ResultSet()
    #Although at first glance the methods seem to be good candidates for
    #static methods, it's possible that descendants will want to take instance
    #variables into consideration when deciding how to process certain Nodes,
    #Links, or other input, so the methods will be regular member methods.
    def include_node(self, node_under_consideration):
        """Examines a node to decide whether it should be part of the graph.
        The default is to check the weight of all Node objects that wish to be
        part of the graph."""
        return (node_under_consideration.weight>=
                    self._node_weight_threshold)
    def include_link(self, link_under_consideration):
        """Examines a link to decide whether it should be part of the graph.
        The default is to check the strength of all Link objects that wish to
        be part of the graph."""
        return (link_under_consideration.weight>=
                    self._link_weight_threshold)
    def _preprocess_article(self, list_of_lines):
        """Makes a first pass through the article. This default implementation
        computes the TF*IDF values for the document, if a TF*IDF provider was 
        specified."""
        if self._tf_idf is not None:
            self._tf_idf.start_tf()
            for line in list_of_lines:
                self._tf_idf.tf_line(line)
            self._tf_idf_scores=self._tf_idf.end_tf()
        return
    def _node_factory(self, cui, description, weight, original_line=None):
        """Generates a new node. It will weight the node using the TF*IDF 
        provider, if one was specified."""
        # This function also mantains the internal node list
        new_node=Node(cui, description, weight, original_line)
        if self._tf_idf_scores is not None:
            new_node.weight=(new_node.weight*
                             self._tf_idf_scores[cui])
        self._node_cache.add(new_node)
        return new_node
    def _link_factory(self, node1, node2, weight, name=None):
        """Generates a new link."""
        return Link(node1, node2, weight, name)
    def _adirectional_link_factory(self, node1, node2, weight, name=None):
        """Generates a new adirectional link."""
        return AdirectionalLink(node1, node2, weight, name)
    def _create_graph(self, list_of_lines):
        """Actually build the graph (default implementation does nothing and
        returns an empty graph). Override in subclasses."""
        # pylint: disable-msg=W0613
        # Do something worthwhile
        return self._type_of_graph_to_build()
    def _post_process_graph(self, built_graph):
        """Post-processes the graph. The default implementation consolidates 
        it and adds orphan nodes to the graph, consolidating it again."""
        built_graph.consolidate_graph()
        self._tf_idf_scores=None # Make sure the scores aren't recycled 
                                 # accidentally
        if self._add_orphan_nodes:
            added=0
            rels=built_graph.relationships
            known_nodes=set([x.node1 for x in rels] + [x.node2 for x in rels])
            for n in self._node_cache:
                if n not in known_nodes:
                    added+=1
                    built_graph.add_relationship(
                            AdirectionalLink(n, n, n.weight))
            built_graph.consolidate_graph()
            logging.log(ULTRADEBUG, "Added %d orphan nodes", added)
        self._node_cache=set([])
        return built_graph
    def create_graph(self, list_of_lines):
        # Make sure that the measurements only reflect one run
        """Sets up the creation of a graph, ensuring that the metrics
        from the creation reflect only the latest run."""
        self._measurements=ResultSet()
        self._line_set_id=list_of_lines.set_id
        self._preprocess_article(list_of_lines.lines)
        graph=self._create_graph(list_of_lines.lines)
        graph=self._post_process_graph(graph)
        self._compute_measurements(list_of_lines.lines)
        return graph
    def _compute_measurements(self, list_of_lines):
        """Computes measurements on the lines the builder received, and the 
        graph building process."""
        self._measurements.add(ArticleLineCount(len(list_of_lines)))
        if len(list_of_lines)==0:
            wordcount=0
        else:
            wordcount=reduce(operator.add, (len(x.line.split()) for x in 
                                            list_of_lines))
        self._measurements.add(ArticleWordCount(wordcount))
    def get_measurements(self):
        """Getter for the measurements property"""
        return self._measurements
    measurements=property(get_measurements)
Exemple #7
0
class GraphBuilder(object):
    """GraphBuilder is a generic base class to derive. It provides a template
    to build the actual GraphBuilders, that should subclass this one. The
    design provides hooks to filter each node and link as it's considered, and
    those may be overridden by descendants to customize processing easily.
    
    The default filtering behavior is filtering by confidence (since it's
    something that will come in quite handy). 
    
    You can also specify whether to add the nodes that don't belong to a link
    back to the graph as nodes linked to themselves upon consolidation via the
    add_orphan_nodes parameter (it defaults to True)."""
    def __init__(self,
                 type_of_graph_to_build=Graph,
                 node_weight_threshold=0.0,
                 link_weight_threshold=0.0,
                 tf_idf_provider=None,
                 add_orphan_nodes=True):
        """Accepts a type of Graph to build (so we can use Graph subclasses
        without creating a separate hierarchy of GraphBuilder subclasses)"""
        self._type_of_graph_to_build = type_of_graph_to_build
        self._node_weight_threshold = node_weight_threshold
        self._link_weight_threshold = link_weight_threshold
        self._tf_idf = tf_idf_provider
        self._tf_idf_scores = None
        self._line_set_id = None
        self._node_cache = set([])
        self._add_orphan_nodes = add_orphan_nodes
        self._measurements = ResultSet()

    #Although at first glance the methods seem to be good candidates for
    #static methods, it's possible that descendants will want to take instance
    #variables into consideration when deciding how to process certain Nodes,
    #Links, or other input, so the methods will be regular member methods.
    def include_node(self, node_under_consideration):
        """Examines a node to decide whether it should be part of the graph.
        The default is to check the weight of all Node objects that wish to be
        part of the graph."""
        return (node_under_consideration.weight >= self._node_weight_threshold)

    def include_link(self, link_under_consideration):
        """Examines a link to decide whether it should be part of the graph.
        The default is to check the strength of all Link objects that wish to
        be part of the graph."""
        return (link_under_consideration.weight >= self._link_weight_threshold)

    def _preprocess_article(self, list_of_lines):
        """Makes a first pass through the article. This default implementation
        computes the TF*IDF values for the document, if a TF*IDF provider was 
        specified."""
        if self._tf_idf is not None:
            self._tf_idf.start_tf()
            for line in list_of_lines:
                self._tf_idf.tf_line(line)
            self._tf_idf_scores = self._tf_idf.end_tf()
        return

    def _node_factory(self, cui, description, weight, original_line=None):
        """Generates a new node. It will weight the node using the TF*IDF 
        provider, if one was specified."""
        # This function also mantains the internal node list
        new_node = Node(cui, description, weight, original_line)
        if self._tf_idf_scores is not None:
            new_node.weight = (new_node.weight * self._tf_idf_scores[cui])
        self._node_cache.add(new_node)
        return new_node

    def _link_factory(self, node1, node2, weight, name=None):
        """Generates a new link."""
        return Link(node1, node2, weight, name)

    def _adirectional_link_factory(self, node1, node2, weight, name=None):
        """Generates a new adirectional link."""
        return AdirectionalLink(node1, node2, weight, name)

    def _create_graph(self, list_of_lines):
        """Actually build the graph (default implementation does nothing and
        returns an empty graph). Override in subclasses."""
        # pylint: disable-msg=W0613
        # Do something worthwhile
        return self._type_of_graph_to_build()

    def _post_process_graph(self, built_graph):
        """Post-processes the graph. The default implementation consolidates 
        it and adds orphan nodes to the graph, consolidating it again."""
        built_graph.consolidate_graph()
        self._tf_idf_scores = None  # Make sure the scores aren't recycled
        # accidentally
        if self._add_orphan_nodes:
            added = 0
            rels = built_graph.relationships
            known_nodes = set([x.node1
                               for x in rels] + [x.node2 for x in rels])
            for n in self._node_cache:
                if n not in known_nodes:
                    added += 1
                    built_graph.add_relationship(
                        AdirectionalLink(n, n, n.weight))
            built_graph.consolidate_graph()
            logging.log(ULTRADEBUG, "Added %d orphan nodes", added)
        self._node_cache = set([])
        return built_graph

    def create_graph(self, list_of_lines):
        # Make sure that the measurements only reflect one run
        """Sets up the creation of a graph, ensuring that the metrics
        from the creation reflect only the latest run."""
        self._measurements = ResultSet()
        self._line_set_id = list_of_lines.set_id
        self._preprocess_article(list_of_lines.lines)
        graph = self._create_graph(list_of_lines.lines)
        graph = self._post_process_graph(graph)
        self._compute_measurements(list_of_lines.lines)
        return graph

    def _compute_measurements(self, list_of_lines):
        """Computes measurements on the lines the builder received, and the 
        graph building process."""
        self._measurements.add(ArticleLineCount(len(list_of_lines)))
        if len(list_of_lines) == 0:
            wordcount = 0
        else:
            wordcount = reduce(operator.add,
                               (len(x.line.split()) for x in list_of_lines))
        self._measurements.add(ArticleWordCount(wordcount))

    def get_measurements(self):
        """Getter for the measurements property"""
        return self._measurements

    measurements = property(get_measurements)
Exemple #8
0
 def compute_measures(self):
     """Computes graph metrics for the current object."""
     self._consolidate_if_necessary()
     logging.log(ULTRADEBUG, "Computing graph metrics for %r", self)
     graph_measures=ResultSet()
     graph_measures.add(GraphNumberLinks(len(self._relationships)))
     unique_nodes=set()
     for a_relation in self._relationships:
         unique_nodes.add(a_relation.node1)
         unique_nodes.add(a_relation.node2)
     graph_measures.add(GraphNumberNodes(len(unique_nodes)))
     graph_measures.add(GraphAverageNodeWeight(reduce(operator.add,
                                 [x.weight for x in unique_nodes])/
                                 float(len(unique_nodes))))
     graph_measures.add(GraphAverageLinkWeight(reduce(operator.add,
                                 [x.weight for x in self._relationships])/
                                 float(len(self._relationships))))
     graph_measures.add(GraphLinkDegree(float(len(self._relationships))/
                                        float(len(unique_nodes))))
     logging.log(ULTRADEBUG, "Starting computation of the distance matrix.")
     distmat=DistanceMatrix(self.as_mapped_link_matrix())
     logging.log(ULTRADEBUG, "Distance matrix obtained. Computing stats.")
     rocs=[distmat.relative_out_centrality(x) for x in 
           xrange(len(distmat))]
     rics=[distmat.relative_in_centrality(x) for x in 
           xrange(len(distmat))]
     avrocs=reduce(operator.add, rocs)/float(len(distmat))
     avrics=reduce(operator.add, rics)/float(len(distmat))
     graph_measures.add(GraphRelativeOutCentrality(avrocs))
     graph_measures.add(GraphRelativeInCentrality(avrics))
     graph_measures.add(GraphStratum(distmat.stratum()))
     graph_measures.add(GraphCompactness(distmat.compactness()))
     logging.log(ULTRADEBUG, "Finished computing graph metrics.")
     return graph_measures
Exemple #9
0
 def compute_measures(self):
     """Computes graph metrics for the current object."""
     self._consolidate_if_necessary()
     logging.log(ULTRADEBUG, "Computing graph metrics for %r", self)
     graph_measures = ResultSet()
     graph_measures.add(GraphNumberLinks(len(self._relationships)))
     unique_nodes = set()
     for a_relation in self._relationships:
         unique_nodes.add(a_relation.node1)
         unique_nodes.add(a_relation.node2)
     graph_measures.add(GraphNumberNodes(len(unique_nodes)))
     graph_measures.add(
         GraphAverageNodeWeight(
             reduce(operator.add, [x.weight for x in unique_nodes]) /
             float(len(unique_nodes))))
     graph_measures.add(
         GraphAverageLinkWeight(
             reduce(operator.add, [x.weight for x in self._relationships]) /
             float(len(self._relationships))))
     graph_measures.add(
         GraphLinkDegree(
             float(len(self._relationships)) / float(len(unique_nodes))))
     logging.log(ULTRADEBUG, "Starting computation of the distance matrix.")
     distmat = DistanceMatrix(self.as_mapped_link_matrix())
     logging.log(ULTRADEBUG, "Distance matrix obtained. Computing stats.")
     rocs = [
         distmat.relative_out_centrality(x) for x in xrange(len(distmat))
     ]
     rics = [
         distmat.relative_in_centrality(x) for x in xrange(len(distmat))
     ]
     avrocs = reduce(operator.add, rocs) / float(len(distmat))
     avrics = reduce(operator.add, rics) / float(len(distmat))
     graph_measures.add(GraphRelativeOutCentrality(avrocs))
     graph_measures.add(GraphRelativeInCentrality(avrics))
     graph_measures.add(GraphStratum(distmat.stratum()))
     graph_measures.add(GraphCompactness(distmat.compactness()))
     logging.log(ULTRADEBUG, "Finished computing graph metrics.")
     return graph_measures