def __init__(self, spider_model): print "start : init : SpiderView" self.spider_model = spider_model # get data from spider model self.show_graph = Graph() # graph to draw self.dict_used = {} self.index_panel = Panel("Indexing", fixed=False, modal=True) self.search_panel = Panel("Search..", x=450, fixed=False, modal=True) self.result_text = Text("") # set text to show result self.update_complete_text = Text("") # set text to update status self.update_state = 0 # State of update self.index_website = "" self.deep = 0 self.graph_website = "" self.website_field = "" print "complete : init : SpiderView"
def set_into_graph(self, root_website, json_dict): print "start : set_into_graph : SpiderModelShowing" if type(root_website) != str or type(json_dict) != dict: raise TypeError("Type Error input is not type match") show_graph = Graph() # graph to draw for netloc_uni in json_dict[root_website]: # use netloc in json_dict netloc = netloc_uni.encode("ascii", "ignore") # change unicode to string for website_uni in json_dict[root_website][ netloc]: # use website in json_dict website = website_uni.encode( "ascii", "ignore") # change unicode to string if self.get_netloc(website) != "": # netloc has word show_graph.add_node( self.get_netloc(website)) # add netloc in draw graph # use child website for child_website_uni in json_dict[root_website][netloc][ website]["website"]: child_website = child_website_uni.encode("ascii", "ignore") # add netloc of child website into draw graph if self.get_netloc(child_website) != "": show_graph.add_node(self.get_netloc(child_website)) # detect netloc of website and child-website is not same if self.get_netloc(website) != self.get_netloc(child_website) \ and self.get_netloc(website) != "" \ and self.get_netloc(child_website) != "": # add edge website to child-website to draw graph show_graph.add_edge(self.get_netloc(website), self.get_netloc(child_website)) print "complete : set_into_graph : SpiderModelShowing" return show_graph
def create_graph(p_node, s): global g g = Graph() p = p_node temp = s.pAll[p_node].follower g.add_node(id=str(p_node), radius=5, stroke=color(1, 0, 0.25, 1), text=color(1)) for i in range(len(temp)): #100 g.add_node(id=str(temp[i]), radius=5, stroke=color(1), text=color(1)) # Random edges. p_links = s.pAll[p_node].follower for i in range(len(p_links)): node1 = str(p_node) #choice(g.nodes) node2 = str(p_links[i]) #choice(g.nodes) g.add_edge(node1, node2, length=500.0, weight=random(), stroke=color(1, 0, 0.25, 1)) #New Round for I in range(len(temp)): p_node = s.pAll[p].follower[I] #0 p_links = s.pAll[p_node].follower #print [x * 0.01 for x in range(0,100)] r = rd.choice([x * 0.01 for x in range(0, 100) ]) #rd.choice([0,51,255]) #rd.randint(10,200) green = rd.choice([x * 0.01 for x in range(0, 100) ]) #rd.choice([255,128,255]) #rd.randint(0,100) b = rd.choice([x * 0.01 for x in range(0, 100) ]) #rd.choice([128,0,255]) #rd.randint(0,200) #print r,green,b for i in range(len(p_links)): node1 = str(p_node) #choice(g.nodes) bul = True if p_links[i] not in s.pAll[p].follower: #Not a parent follower for j in range(I): #Loop to check another sibling's follower sibling = s.pAll[p].follower[j] if p_links[i] in s.pAll[sibling].follower: bul = False if bul: g.add_node(id=str(p_links[i]), radius=5, stroke=color(r, green, b, 1), text=color(1)) node2 = str(p_links[i]) #choice(g.nodes) g.add_edge(node1, node2, length=50.0, weight=random(), stroke=color(r, green, b, 1)) # Two handy tricks to prettify the layout: # 1) Nodes with a higher weight (i.e. incoming traffic) appear bigger. for node in g.nodes: node.radius = 5 #node.radius + node.radius*node.weight # 2) Nodes with only one connection ("leaf" nodes) have a shorter connection. for node in g.nodes: if len(node.edges) == 1: node.edges[0].length = 0.5 g.prune(depth=0) # Remove orphaned nodes with no connections. g.distance = 15 # Overall spacing between nodes. g.layout.force = 0.01 # Strength of the attractive & repulsive force. g.layout.repulsion = 5 # Repulsion radius. dragged = None
from nodebox.graphics import * from nodebox.graphics.physics import Node, Edge, Graph import random as rd # Create a graph with randomly connected nodes. # Nodes and edges can be styled with fill, stroke, strokewidth parameters. # Each node displays its id as a text label, stored as a Text object in Node.text. # To hide the node label, set the text parameter to None. #g = None g = Graph() # Random nodes. def create_graph(p_node, s): global g g = Graph() p = p_node temp = s.pAll[p_node].follower g.add_node(id=str(p_node), radius=5, stroke=color(1, 0, 0.25, 1), text=color(1)) for i in range(len(temp)): #100
def create_graph(p_node, s): completed_nodes = [] uncomplete_nodes = [] global g g = Graph() p = p_node temp = s.pAll[p_node].follower g.add_node(id=str(p_node), radius=5, stroke=color(1, 0, 0.25, 1), text=color(1)) ## print "-"*50 ## print "parent_node ",p_node ## print "parent follower ",temp for i in range(len(temp)): #100 g.add_node(id=str(temp[i]), radius=5, stroke=color(1), text=color(1)) completed_nodes.append(p_node) # Random edges. p_links = s.pAll[p_node].follower for i in range(len(p_links)): node1 = str(p_node) #choice(g.nodes) node2 = str(p_links[i]) #choice(g.nodes) uncomplete_nodes.append(p_links[i]) g.add_edge(node1, node2, length=500.0, weight=random(), stroke=color(1, 0, 0.25, 1)) ## print "-"*50 ## print "Completed_Nodes ",completed_nodes ## print "Uncomplete_Nodes ",uncomplete_nodes while len(uncomplete_nodes) <> 0: node1 = uncomplete_nodes[0] follower_list = s.pAll[node1].follower ## print "node1 which became parent ",node1 for i in follower_list: if i not in completed_nodes: node_1 = str(node1) node2 = str(i) print node_1, "--->", i uncomplete_nodes.append(i) g.add_node(id=str(i), radius=5, stroke=color(1, 0, 0.25, 1), text=color(1)) g.add_edge(node_1, node2, length=50.0, stroke=color(1), weight=random()) completed_nodes.append(node1) del uncomplete_nodes[0] ## print "Completed_Nodes ",completed_nodes ## print "Uncomplete_Nodes ",uncomplete_nodes #New Round ## for I in range(len(temp)): ## ## p_node = s.pAll[p].follower[I] #0 ## p_links = s.pAll[p_node].follower ## ## print "Sibling Node ",p_node ## print "Sibling Follower ",p_links ## ## #print [x * 0.01 for x in range(0,100)] ## r = rd.choice([x * 0.01 for x in range(0,100)])#rd.choice([0,51,255]) #rd.randint(10,200) ## green = rd.choice([x * 0.01 for x in range(0,100)]) #rd.choice([255,128,255]) #rd.randint(0,100) ## b = rd.choice([x * 0.01 for x in range(0,100)]) #rd.choice([128,0,255]) #rd.randint(0,200) ## ## #print r,green,b ## ## for i in range(len(p_links)): ## node1 = str(p_node)#choice(g.nodes) ## ## bul = True ## if p_links[i] not in s.pAll[p].follower: #Not a parent follower ## ## for j in range (I): #Loop to check another sibling's follower ## ## sibling = s.pAll[p].follower[j] ## ## if p_links[i] in s.pAll[sibling].follower: ## ## bul = False ## ## if bul: ## g.add_node(id=str(p_links[i]),radius = 5,stroke = color(r, green, b, 1),text = color(1)) ## node2 = str(p_links[i])#choice(g.nodes) ## g.add_edge(node1, node2, ## length = 50.0, ## weight = random(), ## stroke = color(r, green, b, 1)) ## # Two handy tricks to prettify the layout: # 1) Nodes with a higher weight (i.e. incoming traffic) appear bigger. for node in g.nodes: node.radius = 5 #node.radius + node.radius*node.weight # 2) Nodes with only one connection ("leaf" nodes) have a shorter connection. for node in g.nodes: if len(node.edges) == 1: node.edges[0].length = 0.5 g.prune(depth=0) # Remove orphaned nodes with no connections. g.distance = 15 # Overall spacing between nodes. g.layout.force = 0.01 # Strength of the attractive & repulsive force. g.layout.repulsion = 5 # Repulsion radius. dragged = None
def test_total(self): root_website = "http://www.meawnam.com" file_data = open(os.getcwd() + "\\other\\test_total.html", "r+") html_code = file_data.read() file_data.close() data_str_html = self.spider.get_html_code_to_datastr( root_website, html_code) self.assertEqual( data_str_html, "GAMEBOY [GOOGLE](http://www.google.com) " "[electric](http://www.electric.com) " "[spotlight](http://www.spotlight.com)") content_html = self.spider.get_content_from_datastr(data_str_html) self.assertEqual(content_html, "GAMEBOY") weblink_html = self.spider.get_weblink_from_datastr(data_str_html) self.assertEqual(weblink_html, "GOOGLE electric spotlight") website_list = self.spider.get_website_from_datastr(data_str_html) self.assertListEqual(website_list, [ "http://www.google.com", "http://www.electric.com", "http://www.spotlight.com" ]) dict_json = { root_website: { self.spider.get_netloc(root_website): { root_website: { "content": content_html + " " + weblink_html, "website": website_list } } } } content_dict = {root_website: content_html + " " + weblink_html} website_dict = {root_website: website_list} self.assertDictEqual( self.spider.get_json_string_for_deep(root_website, website_dict, content_dict), dict_json) graph = Graph() graph.add_node("www.meawnam.com") graph.add_node("www.google.com") graph.add_node("www.electric.com") graph.add_node("www.spotlight.com") graph.add_edge("www.meawnam.com", "www.google.com") graph.add_edge("www.meawnam.com", "www.electric.com") graph.add_edge("www.meawnam.com", "www.electric.com") graph.add_edge("www.meawnam.com", "www.spotlight.com") self.assertEqual(graph, self.spider.set_into_graph(root_website, dict_json)) dict_n_used = { "www.meawnam.com": 0, "www.google.com": 1, "www.electric.com": 1, "www.spotlight.com": 1 } self.assertEqual(dict_n_used, self.spider.set_n_used(root_website, dict_json)) save_file = open(os.getcwd() + "\\other\\test_index_total.json", "w+") save_file.write(json.dumps(dict_json, indent=4, sort_keys=True)) save_file.close() file_list = [os.getcwd() + "\\other\\test_index_total.json"] index_dict = self.spider.indexing({}, file_list) my_indexing = { "gameboy": { "http://www.meawnam.com": { "used": 0, "word": 1 } }, "google": { "http://www.meawnam.com": { "used": 0, "word": 1 } }, "electric": { "http://www.meawnam.com": { "used": 0, "word": 1 } }, "spotlight": { "http://www.meawnam.com": { "used": 0, "word": 1 } } } self.assertDictEqual(index_dict, my_indexing) ranking_dict = self.spider.ranking(index_dict) my_ranking = { "gameboy": [("http://www.meawnam.com", { "used": 0, "word": 1 })], "google": [("http://www.meawnam.com", { "used": 0, "word": 1 })], "electric": [("http://www.meawnam.com", { "used": 0, "word": 1 })], "spotlight": [("http://www.meawnam.com", { "used": 0, "word": 1 })] } self.assertDictEqual(ranking_dict, my_ranking)