def get_pattern_data(search_param): twitter = Twitter(language='en') for tweet in twitter.search(search_param, cached=True): print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8')) g = Graph() for i in range(10): for result in twitter.search(search_param, start=i + 1, count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) p = '{NP} (VP) ' + search_param + ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75)) # R,G,B,A #if len(g)>0: # g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('data', directed=False, weighted=0.6)
def compare_visualization(product_sku, compare_phrase): all_reviews = ReviewInfo.objects.all().filter(sku=product_sku) g = Graph() count = 0.0 for e in all_reviews : s = e.comment.lower() s = plaintext(s) s = parsetree(s) #p = '{NP} (VP) faster than {NP}' p = '{NP} (VP) ' + compare_phrase + ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A count += 1.0 print count/len(all_reviews), '\r' if len(g) > 0: g = g.split()[0] # Largest subgraph. for n in g.sorted()[:80]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('static/compare_visualization', directed=True, weighted=2.0) return True else: return False
def get_pattern_data(search_param): twitter = Twitter(language='en') for tweet in twitter.search(search_param, cached=True): print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8')) g = Graph() for i in range(10): for result in twitter.search(search_param, start=i+1,count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) p = '{NP} (VP) ' +search_param+ ' {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A #if len(g)>0: # g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('data', directed=False, weighted=0.6)
# This example demonstrates how a graph visualization can be exported to HTML, # using the HTML5 <canvas> tag and Javascript. # All properties (e.g. stroke color) of nodes and edges are ported. g = Graph() # Random nodes. for i in range(50): g.add_node(id=str(i + 1), radius=5, stroke=(0, 0, 0, 1), text=(0, 0, 0, 1)) # Random edges. for i in range(75): node1 = choice(g.nodes) node2 = choice(g.nodes) g.add_edge(node1, node2, length=1.0, weight=random(), stroke=(0, 0, 0, 1)) for node in g.sorted()[:20]: # More blue = more important. node.fill = (0.6, 0.8, 1.0, 0.8 * node.weight) # This node's label is different from its id. # We'll also make it a link, see the href attribute at the bottom. g["1"].text.string = "home" # The export() command generates a folder with an index.html, # that displays the graph using an interactive, force-based spring layout. # You can drag the nodes around - open index.html in a browser and try it out! # The layout can be tweaked in many ways: export( g, "test",
import os, sys; sys.path.insert(0, os.path.join("..", "..", "..")) from pattern.graph import Graph, CENTRALITY # Simple Graph demonstration. g = Graph() for n in ("tree", "nest", "bird", "fly", "insect", "ant"): g.add_node(n) g.add_edge("tree", "nest") g.add_edge("nest", "bird") g.add_edge("bird", "fly") g.add_edge("fly", "insect") g.add_edge("insect", "ant") g.add_edge("ant", "tree") g.add_edge("ant", "bird") print g.shortest_path(g.node("tree"), g.node("fly")) print g.shortest_path(g.node("nest"), g.node("ant")) print # Which nodes get the most traffic? print g.sorted(order=CENTRALITY)
# Random nodes. for i in range(50): g.add_node(id=str(i + 1), radius=5, stroke=(0, 0, 0, 1), text = (0, 0, 0, 1)) # Random edges. for i in range(75): node1 = choice(g.nodes) node2 = choice(g.nodes) g.add_edge(node1, node2, length=1.0, weight=random(), stroke=(0, 0, 0, 1)) for node in g.sorted()[:20]: # More blue = more important. node.fill = (0.6, 0.8, 1.0, 0.8 * node.weight) g.prune(0) # This node's label is different from its id. # We'll make it a hyperlink, see the href attribute at the bottom. # FIXME this fails if the 1 has been pruned # g[1].text.string = "home" # The export() command generates a folder with an index.html, # that displays the graph using an interactive, force-based spring layout. # You can drag the nodes around - open index.html in a browser and try it out! # The layout can be tweaked in many ways:
from pattern.web import Bing, plaintext from pattern.en import parsetree from pattern.search import search from pattern.graph import Graph g = Graph() for i in range(10): # for result in Bing().search('"more important than"', start=i+1, for result in Bing().search('"is less important than"', start=i+1, count=50): s = result.text.lower() s = plaintext(s) s = parsetree(s) #p = '{NP} (VP) more important than {NP}' p = '{NP} (VP) is less important than {NP}' for m in search(p, s): x = m.group(1).string # NP left y = m.group(2).string # NP right if x not in g: g.add_node(x) if y not in g: g.add_node(y) g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sort by Node.weight. n.fill = (0, 0.5, 1, 0.75 * n.weight) g.export('test', directed=True, weighted=0.6)
import os, sys sys.path.append(os.path.join("..", "..", "..")) from pattern.graph import Graph, CENTRALITY # Simple Graph demonstration. g = Graph() for n in ("tree", "nest", "bird", "fly", "insect", "ant"): g.add_node(n) g.add_edge("tree", "nest") g.add_edge("nest", "bird") g.add_edge("bird", "fly") g.add_edge("fly", "insect") g.add_edge("insect", "ant") g.add_edge("ant", "tree") g.add_edge("ant", "bird") print g.shortest_path(g.node("tree"), g.node("fly")) print g.shortest_path(g.node("nest"), g.node("ant")) print # Which nodes get the most traffic? print g.sorted(order=CENTRALITY)
p = Pattern.fromstring('NP (VP) more important than NP') for m in p.search(s): a = m.constituents(p[+0])[-1] # Left NP. b = m.constituents(p[-1])[+0] # Right NP. a = (isinstance(a, Chunk) and a.head or a).string b = (isinstance(b, Chunk) and b.head or b).string if a and b: if a not in g: g.add_node(a, radius=5, stroke=(0, 0, 0, 0.8)) if b not in g: g.add_node(b, radius=5, stroke=(0, 0, 0, 0.8)) g.add_edge(g[b], g[a], stroke=(0, 0, 0, 0.6)) g = g.split()[0] # Largest subgraph. for n in g.sorted()[:40]: # Sorted by Node.weight. n.fill = (0.0, 0.5, 1.0, 0.7 * n.weight) export(g, 'test', directed=True, weighted=0.6, distance=6, force=0.05, repulsion=150) import os os.system('ls -lR test/') # Example of pattern: http://www.clips.ua.ac.be/pages/pattern from pattern.web import Bing, plaintext