예제 #1
0
def compare_visualization(product_sku, compare_phrase):
    all_reviews = ReviewInfo.objects.all().filter(sku=product_sku)
    g = Graph()

    count = 0.0
    for e in all_reviews :
        s = e.comment.lower() 
        s = plaintext(s)
        s = parsetree(s)
        #p = '{NP} (VP) faster than {NP}'
        p = '{NP} (VP) ' + compare_phrase + ' {NP}'
        for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
                g.add_node(x)
            if y not in g:
                g.add_node(y)
            g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A
        count += 1.0
        print count/len(all_reviews), '\r'

    if len(g) > 0: 
        g = g.split()[0] # Largest subgraph.
        for n in g.sorted()[:80]: # Sort by Node.weight.
            n.fill = (0, 0.5, 1, 0.75 * n.weight)

        g.export('static/compare_visualization', directed=True, weighted=2.0)
        return True
    else: 
        return False
예제 #2
0
 def subGraph(self):
     # Take the largest subgraph.
     h = self.g.split()[0]
     
     # Sort by Node.weight.i = 1
     i = 0
     newGraph = Graph()
     for n in h.sorted()[:30]:
         i += 1
         n.fill = (0, 0.5, 1, 0.75 * n.weight)
         logger.debug(u"i:%d=%s" % (i, n))
         newGraph.add_node(n.id)
         logger.debug(u"edges : %s" % n.edges)
 
         for e in n.edges:
             logger.debug(u"edge1 : %s, edge2 : %s" % (e.node1.id, e.node2.id))
             if e.node1.id == n.id:
                 newGraph.add_node(e.node2.id)
             else:
                 newGraph.add_node(e.node1.id)
             newGraph.add_edge(e.node1.id, e.node2.id, stroke=(0, 0, 0, 0.75))
     
     h = newGraph.split()
     
     return h
예제 #3
0
class PatternGraph(ConceptGraph):
    g = None

    def __init__(self, homeDir=None):
        super(self.__class__, self).__init__()

        if homeDir is None:
            homeDir = os.getcwd()
            
        self.homeDir = homeDir + os.sep + u"html"
        
        if not os.path.exists(self.homeDir):
            os.makedirs(self.homeDir)
            
        self.g = Graph()

    def addNode(self, n):
        self.g.add_node(n.name)

    def addEdge(self, p, c):

        self.g.add_edge(p.name, c.name, stroke=(0, 0, 0, 0.75))  # R,G,B,A
    
    def exportGraph(self, title=u"Pattern Graph"):
        logger.debug(u"exportGraph")
        
        logger.info(u"Graph Size: %d" % self.g.__len__())
        
        k = self.subGraph()
        
        # Iterate through a list of unconnected subgraphs
        if len(k) > 5:
            klimit = 5
        else:
            klimit = len(k)
            
        for i in range(0, klimit):
            logger.debug(u"Graph[%d]=%d" % (i, len(k[i])))
            newDir = self.homeDir + os.sep + u"graph" + str(i)
            h = k[i] 
            h.export(newDir, overwrite=True, directed=True, weighted=0.5, title=title)
            i += 1
        
    def subGraph(self):
        # Take the largest subgraph.
        h = self.g.split()[0]
        
        # Sort by Node.weight.i = 1
        i = 0
        newGraph = Graph()
        for n in h.sorted()[:30]:
            i += 1
            n.fill = (0, 0.5, 1, 0.75 * n.weight)
            logger.debug(u"i:%d=%s" % (i, n))
            newGraph.add_node(n.id)
            logger.debug(u"edges : %s" % n.edges)
    
            for e in n.edges:
                logger.debug(u"edge1 : %s, edge2 : %s" % (e.node1.id, e.node2.id))
                if e.node1.id == n.id:
                    newGraph.add_node(e.node2.id)
                else:
                    newGraph.add_node(e.node1.id)
                newGraph.add_edge(e.node1.id, e.node2.id, stroke=(0, 0, 0, 0.75))
        
        h = newGraph.split()
        
        return h
예제 #4
0
comparisons = []

for i in range(1,10):
    # Set cached=False for live results:
    for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True):
        s = result.text
        s = s.replace("\n", " ")
        s = s.lower()
        s = s.replace("is the new", "NEW")
        s = s.split(" ")
        try:
            i = s.index("NEW")
            A = s[i-1].strip("?!.:;,#@\"'")
            B = s[i+1].strip("?!.:;,#@\"'")
            # Exclude common phrases such as "this is the new thing".
            if A and B and A not in ("it", "this", "here", "what", "why", "where"):
                comparisons.append((A,B))
        except:
            pass

g = Graph()
for A, B in comparisons:
    e = g.add_edge(B, A) # "A is the new B": A <= B
    e.weight += 0.1
    print B, "=>", A

# Not all nodes will be connected, there will be multiple subgraphs.
# Simply take the largest subgraph for our visualization.
g = g.split()[0]

export(g, "trends", weight=True, weighted=True, directed=True, overwrite=True)
예제 #5
0
for i in range(1, 10):
    # Set cached=False for live results:
    for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True):
        s = result.text
        s = s.replace("\n", " ")
        s = s.lower()
        s = s.replace("is the new", "NEW")
        s = s.split(" ")
        try:
            i = s.index("NEW")
            A = s[i - 1].strip("?!.:;,#@\"'")
            B = s[i + 1].strip("?!.:;,#@\"'")
            # Exclude common phrases such as "this is the new thing".
            if A and B and A not in ("it", "this", "here", "what", "why", "where"):
                comparisons.append((A, B))
        except:
            pass

g = Graph()
for A, B in comparisons:
    e = g.add_edge(B, A)  # "A is the new B": A <= B
    e.weight += 0.1
    print(("%s => %s" % (B, A)).encode('utf-8'))

# Not all nodes will be connected, there will be multiple subgraphs.
# Simply take the largest subgraph for our visualization.
g = g.split()[0]

g.export("trends", weighted=True, directed=True)
예제 #6
0
from pattern.web    import Bing, plaintext
from pattern.en     import parsetree
from pattern.search import search
from pattern.graph  import Graph
 
g = Graph()
for i in range(10):
#    for result in Bing().search('"more important than"', start=i+1,
    for result in Bing().search('"is less important than"', start=i+1,
    count=50):
        s = result.text.lower() 
        s = plaintext(s)
        s = parsetree(s)
        #p = '{NP} (VP) more important than {NP}'
        p = '{NP} (VP) is less important than {NP}'
        for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
                g.add_node(x)
            if y not in g:
                g.add_node(y)
            g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A
 
g = g.split()[0] # Largest subgraph.
 
for n in g.sorted()[:40]: # Sort by Node.weight.
    n.fill = (0, 0.5, 1, 0.75 * n.weight)
 
g.export('test', directed=True, weighted=0.6)
        s = Sentence(parse(s))
        print s
        p = Pattern.fromstring('NP (VP) more important than NP')
        for m in p.search(s):
            a = m.constituents(p[+0])[-1]  # Left NP.
            b = m.constituents(p[-1])[+0]  # Right NP.
            a = (isinstance(a, Chunk) and a.head or a).string
            b = (isinstance(b, Chunk) and b.head or b).string
            if a and b:
                if a not in g:
                    g.add_node(a, radius=5, stroke=(0, 0, 0, 0.8))
                if b not in g:
                    g.add_node(b, radius=5, stroke=(0, 0, 0, 0.8))
                g.add_edge(g[b], g[a], stroke=(0, 0, 0, 0.6))

g = g.split()[0]  # Largest subgraph.

for n in g.sorted()[:40]:  # Sorted by Node.weight.
    n.fill = (0.0, 0.5, 1.0, 0.7 * n.weight)

export(g,
       'test',
       directed=True,
       weighted=0.6,
       distance=6,
       force=0.05,
       repulsion=150)
import os
os.system('ls -lR test/')

# Example of pattern: http://www.clips.ua.ac.be/pages/pattern