def compare_visualization(product_sku, compare_phrase): all_reviews = ReviewInfo.objects.all().filter(sku=product_sku) g = Graph() count = 0.0 for e in all_reviews : s = e.comment.lower() s = plaintext(s) s = parsetree(s) #p = '{NP} (VP) faster than {NP}' p = '{NP} (VP) ' + compare_phrase + ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A count += 1.0 print count/len(all_reviews), '\r' if len(g) > 0: g = g.split()[0] # Largest subgraph. for n in g.sorted()[:80]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('static/compare_visualization', directed=True, weighted=2.0) return True else: return False
def subGraph(self): # Take the largest subgraph. h = self.g.split()[0] # Sort by Node.weight.i = 1 i = 0 newGraph = Graph() for n in h.sorted()[:30]: i += 1 n.fill = (0, 0.5, 1, 0.75 * n.weight) logger.debug(u"i:%d=%s" % (i, n)) newGraph.add_node(n.id) logger.debug(u"edges : %s" % n.edges) for e in n.edges: logger.debug(u"edge1 : %s, edge2 : %s" % (e.node1.id, e.node2.id)) if e.node1.id == n.id: newGraph.add_node(e.node2.id) else: newGraph.add_node(e.node1.id) newGraph.add_edge(e.node1.id, e.node2.id, stroke=(0, 0, 0, 0.75)) h = newGraph.split() return h
class PatternGraph(ConceptGraph): g = None def __init__(self, homeDir=None): super(self.__class__, self).__init__() if homeDir is None: homeDir = os.getcwd() self.homeDir = homeDir + os.sep + u"html" if not os.path.exists(self.homeDir): os.makedirs(self.homeDir) self.g = Graph() def addNode(self, n): self.g.add_node(n.name) def addEdge(self, p, c): self.g.add_edge(p.name, c.name, stroke=(0, 0, 0, 0.75)) # R,G,B,A def exportGraph(self, title=u"Pattern Graph"): logger.debug(u"exportGraph") logger.info(u"Graph Size: %d" % self.g.__len__()) k = self.subGraph() # Iterate through a list of unconnected subgraphs if len(k) > 5: klimit = 5 else: klimit = len(k) for i in range(0, klimit): logger.debug(u"Graph[%d]=%d" % (i, len(k[i]))) newDir = self.homeDir + os.sep + u"graph" + str(i) h = k[i] h.export(newDir, overwrite=True, directed=True, weighted=0.5, title=title) i += 1 def subGraph(self): # Take the largest subgraph. h = self.g.split()[0] # Sort by Node.weight.i = 1 i = 0 newGraph = Graph() for n in h.sorted()[:30]: i += 1 n.fill = (0, 0.5, 1, 0.75 * n.weight) logger.debug(u"i:%d=%s" % (i, n)) newGraph.add_node(n.id) logger.debug(u"edges : %s" % n.edges) for e in n.edges: logger.debug(u"edge1 : %s, edge2 : %s" % (e.node1.id, e.node2.id)) if e.node1.id == n.id: newGraph.add_node(e.node2.id) else: newGraph.add_node(e.node1.id) newGraph.add_edge(e.node1.id, e.node2.id, stroke=(0, 0, 0, 0.75)) h = newGraph.split() return h
comparisons = [] for i in range(1,10): # Set cached=False for live results: for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True): s = result.text s = s.replace("\n", " ") s = s.lower() s = s.replace("is the new", "NEW") s = s.split(" ") try: i = s.index("NEW") A = s[i-1].strip("?!.:;,#@\"'") B = s[i+1].strip("?!.:;,#@\"'") # Exclude common phrases such as "this is the new thing". if A and B and A not in ("it", "this", "here", "what", "why", "where"): comparisons.append((A,B)) except: pass g = Graph() for A, B in comparisons: e = g.add_edge(B, A) # "A is the new B": A <= B e.weight += 0.1 print B, "=>", A # Not all nodes will be connected, there will be multiple subgraphs. # Simply take the largest subgraph for our visualization. g = g.split()[0] export(g, "trends", weight=True, weighted=True, directed=True, overwrite=True)
for i in range(1, 10): # Set cached=False for live results: for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True): s = result.text s = s.replace("\n", " ") s = s.lower() s = s.replace("is the new", "NEW") s = s.split(" ") try: i = s.index("NEW") A = s[i - 1].strip("?!.:;,#@\"'") B = s[i + 1].strip("?!.:;,#@\"'") # Exclude common phrases such as "this is the new thing". if A and B and A not in ("it", "this", "here", "what", "why", "where"): comparisons.append((A, B)) except: pass g = Graph() for A, B in comparisons: e = g.add_edge(B, A) # "A is the new B": A <= B e.weight += 0.1 print(("%s => %s" % (B, A)).encode('utf-8')) # Not all nodes will be connected, there will be multiple subgraphs. # Simply take the largest subgraph for our visualization. g = g.split()[0] g.export("trends", weighted=True, directed=True)
from pattern.web import Bing, plaintext from pattern.en import parsetree from pattern.search import search from pattern.graph import Graph g = Graph() for i in range(10): # for result in Bing().search('"more important than"', start=i+1, for result in Bing().search('"is less important than"', start=i+1, count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) #p = '{NP} (VP) more important than {NP}' p = '{NP} (VP) is less important than {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('test', directed=True, weighted=0.6)
s = Sentence(parse(s)) print s p = Pattern.fromstring('NP (VP) more important than NP') for m in p.search(s): a = m.constituents(p[+0])[-1] # Left NP. b = m.constituents(p[-1])[+0] # Right NP. a = (isinstance(a, Chunk) and a.head or a).string b = (isinstance(b, Chunk) and b.head or b).string if a and b: if a not in g: g.add_node(a, radius=5, stroke=(0, 0, 0, 0.8)) if b not in g: g.add_node(b, radius=5, stroke=(0, 0, 0, 0.8)) g.add_edge(g[b], g[a], stroke=(0, 0, 0, 0.6)) g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sorted by Node.weight. n.fill = (0.0, 0.5, 1.0, 0.7 * n.weight) export(g, 'test', directed=True, weighted=0.6, distance=6, force=0.05, repulsion=150) import os os.system('ls -lR test/') # Example of pattern: http://www.clips.ua.ac.be/pages/pattern