Ejemplo n.º 1
0
class LexRank:
	"""
	Constructs a summary of the input document by extracting most informative sentences. 

	Arguments:
		directory - A corpus of text files to be summarized. 
	"""
	def __init__(self, directory):
		self.graph = TextGraph(directory)



	def lexR(self, graph):
		"""
		Compute the LexRank of the sentences. 
		LexRank of a sentence in the sentence graph is the PageRank of the node 
		representing the sentence. It is a measure of the importance and influence 
		of the sentence in the corpus. 

		Arguments:
			graph -  A networkx graph or digraph. 

		Returns:
			A dictionary of all the nodes with their PageRank scores. 
		"""

		pr = nx.pagerank_numpy(graph, alpha=0.85)
		return pr




	def summary(self, compression = 0.25):
		"""
		Builds the summary based on the LexRank scores of the sentences.

		Arguments:
			compression : A number in [0,1] which is equal to the fraction of total 
			sentences to be included in the summary. 
			Default value is 0.25

		Returns:
			Summary of the input document.	 
		"""
		g = self.graph.sentenceGraph()
		total_sentences = len(g.nodes())
		n_sentences = int(total_sentences * compression)

		rankings = self.lexR(g)
		ranked_sentences = sorted(rankings.iteritems(), key=operator.itemgetter(1), reverse=True)

		summary_sentences = ""
		i = 0
		while (i < n_sentences):
			u,v = ranked_sentences[i]
			summary_sentences += u
			i = i + 1
		return summary_sentences	
Ejemplo n.º 2
0
class LexRank:
    """
	Constructs a summary of the input document by extracting most informative sentences. 

	Arguments:
		directory - A corpus of text files to be summarized. 
	"""
    def __init__(self, directory):
        self.graph = TextGraph(directory)

    def lexR(self, graph):
        """
		Compute the LexRank of the sentences. 
		LexRank of a sentence in the sentence graph is the PageRank of the node 
		representing the sentence. It is a measure of the importance and influence 
		of the sentence in the corpus. 

		Arguments:
			graph -  A networkx graph or digraph. 

		Returns:
			A dictionary of all the nodes with their PageRank scores. 
		"""

        pr = nx.pagerank_numpy(graph, alpha=0.85)
        return pr

    def summary(self, compression=0.25):
        """
		Builds the summary based on the LexRank scores of the sentences.

		Arguments:
			compression : A number in [0,1] which is equal to the fraction of total 
			sentences to be included in the summary. 
			Default value is 0.25

		Returns:
			Summary of the input document.	 
		"""
        g = self.graph.sentenceGraph()
        total_sentences = len(g.nodes())
        n_sentences = int(total_sentences * compression)

        rankings = self.lexR(g)
        ranked_sentences = sorted(rankings.iteritems(),
                                  key=operator.itemgetter(1),
                                  reverse=True)

        summary_sentences = ""
        i = 0
        while (i < n_sentences):
            u, v = ranked_sentences[i]
            summary_sentences += u
            i = i + 1
        return summary_sentences
Ejemplo n.º 3
0
from TextGraphics.src.graph import TextGraph
from TextGraphics.Analysis.plotting import Pictures
from TextGraphics.Analysis.properties import Analysis
from TextGraphics.Applications.summary import LexRank


directory = 'Data'
g = TextGraph(directory)
senGraph = g.sentenceGraph()
node = senGraph.nodes()[0]

out = Pictures(senGraph)
out.graphPlot(0.3, labelingByNumbers=True)

out1 = Analysis(senGraph)
l = out1.nodeInConnectedComponent(node)
print l.nodes()

cS = out1.centralNodes(4)
print cS

lR = LexRank(directory)
print lR.summary()
from TextGraphics.src.graph import TextGraph
from TextGraphics.Analysis.plotting import Pictures
from TextGraphics.Analysis.properties import Analysis
from TextGraphics.Applications.summary import LexRank

##  Import the corpus of text files.
directory = 'Data'

##  Create the sentence graph
g = TextGraph(directory)
senGraph = g.sentenceGraph()
node = senGraph.nodes()[0]

##  Plot the sentence graph
out = Pictures(senGraph)
out.graphPlot(0.3, labelingByNumbers=True)

# Compute the keyword graph
kwgraph = g.keywordGraph()
print len(kwgraph.nodes())

##  Plot the keyword graph
out = Pictures(kwgraph)
out.graphPlot(0.3, labelingByNumbers=True)

#### Analysis
out1 = Analysis(senGraph)

# Find the connected component of a node.
l = out1.nodeInConnectedComponent(node)
print l.nodes()