def test_02_get_hierarchy_for_module_returns_single_node_when_nothing_depend_on_module(self, mock_client): """ Test that get_hierarchy_for_module returns a single node tree structure if no dependent modules are found :param mock_client: A mocked out version of erppeek.Client :return: """ # Mock Up mock_dp = DependencyGraph orig_mod_search = mock_dp.module_search orig_dep_search = mock_dp.dependency_search orig_client_search = mock_client.search mock_dp.module_search = MagicMock(return_value=[666]) mock_dp.dependency_search = MagicMock(return_value=[]) mock_dg = mock_dp('valid_module') test_hierarchy = Tree() test_hierarchy.create_node('valid_module', 'valid_module') self.assertEqual(mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return [] when finding no dependent modules') # Mock Down mock_client.stop() mock_dp.module_search.stop() mock_client.search.stop() mock_client.search = orig_client_search mock_dp.dependency_search.stop() mock_dp.module_search = orig_mod_search mock_dp.dependency_search = orig_dep_search
class TreePipeline(object): def open_spider(self, spider): self.tree = Tree() self.tree.create_node("root", "root") def process_item(self, item, spider): lst = item['text'] lst = [x.strip() for x in [y.replace('...', '') for y in lst]] item['pagetitle'] = item['pagetitle'].replace('...', '') lst[-1] = item['pagetitle'] for idx, elem in enumerate(lst): if idx == 0: previous = "root" else: previous = "|".join(lst[:idx]) elem = "|".join(lst[:idx + 1]) # elem = elem.replace('...', '') elem = elem.encode('utf-8').decode('utf-8') if not self.tree.contains(elem): print "Adding node %s" % elem self.tree.create_node(elem, elem, parent=previous) # self.tree.show() return item def close_spider(self, spider): self.tree.show() with open(makepath('data/cats/tree.json'), 'w') as outfile: outfile.write(self.tree.to_json()) self.tree.save2file(makepath('data/cats/tree.tree'))
def test_02_get_hierarchy_for_module_returns_single_node_when_nothing_depend_on_module( self, mock_client): """ Test that get_hierarchy_for_module returns a single node tree structure if no dependent modules are found :param mock_client: A mocked out version of erppeek.Client :return: """ # Mock Up mock_dp = DependencyGraph orig_mod_search = mock_dp.module_search orig_dep_search = mock_dp.dependency_search orig_client_search = mock_client.search mock_dp.module_search = MagicMock(return_value=[666]) mock_dp.dependency_search = MagicMock(return_value=[]) mock_dg = mock_dp('valid_module') test_hierarchy = Tree() test_hierarchy.create_node('valid_module', 'valid_module') self.assertEqual( mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return [] when finding no dependent modules' ) # Mock Down mock_client.stop() mock_dp.module_search.stop() mock_client.search.stop() mock_client.search = orig_client_search mock_dp.dependency_search.stop() mock_dp.module_search = orig_mod_search mock_dp.dependency_search = orig_dep_search
def test_04_get_hierarchy_renames_duplicate_modules(self, mock_client): """ Test that get_hierarchy_for_module renames the second instance of a dependent modules ID so can list that module multiple times - valid_module - dependent_module_one - dependent_module_two - dependent_mdodule_one1 :param mock_client: A mocked out version of erppeek.Client :return: """ # Mock Up mock_dp = DependencyGraph orig_mod_search = mock_dp.module_search orig_dep_search = mock_dp.dependency_search orig_client_search = mock_client.search mock_dp.module_search = MagicMock(return_value=[666]) def dependency_search_side_effect(value): if value == 'valid_module': return [666] elif value == 'dependent_module_one': return [668] elif value == 'dependent_module_two': return [664] mock_dp.dependency_search = MagicMock() mock_dp.dependency_search.side_effect = dependency_search_side_effect def dependency_read_side_effect(value): if value == [666]: return ['dependent_module_one', 'dependent_module_two'] elif value == [664]: return ['dependent_module_one'] else: return [] mock_dp.dependency_read = MagicMock() mock_dp.dependency_read.side_effect = dependency_read_side_effect mock_dg = mock_dp('valid_module') test_hierarchy = Tree() test_hierarchy.create_node('valid_module', 'valid_module') test_hierarchy.create_node('dependent_module_one', 'dependent_module_one', parent='valid_module') test_hierarchy.create_node('dependent_module_two', 'dependent_module_two', parent='valid_module') test_hierarchy.create_node('dependent_module_one', 'dependent_module_one1', parent='dependent_module_two') self.assertEqual(mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return nested dict when finding dependent modules') # Mock Down mock_client.stop() mock_dp.module_search.stop() mock_client.search.stop() mock_client.search = orig_client_search mock_dp.dependency_search.stop() mock_dp.module_search = orig_mod_search mock_dp.dependency_search = orig_dep_search
def testUnjsonify(): tree = Tree() tree.create_node('home', 'home') tree.create_node('phone', 'phone', parent='home') tree.create_node('laptop', 'laptop', parent='home') tree.create_node('screen', 'screen', parent='laptop') tree.create_node(19, 19, parent='home') tree.create_node((1, 2), (1, 2), parent='screen') j = tree.to_json() unjsonify(j).show()
def testUnjsonify(): tree = Tree() tree.create_node('home', 'home') tree.create_node('phone', 'phone', parent='home') tree.create_node('laptop', 'laptop', parent='home') tree.create_node('screen', 'screen', parent='laptop') tree.create_node(19, 19, parent='home') tree.create_node((1,2), (1,2), parent='screen') j = tree.to_json() unjsonify(j).show()
def test_03_get_hierarchy_for_module_returns_a_two_node_tree_when_another_module_depends_on_module( self, mock_client): """ Test that get_hierarchy_for_module returns a tree structure with two nodes if a dependent module is found :param mock_client: A mocked out version of erppeek.Client :return: """ # Mock Up mock_dp = DependencyGraph orig_mod_search = mock_dp.module_search orig_dep_search = mock_dp.dependency_search orig_client_search = mock_client.search mock_dp.module_search = MagicMock(return_value=[666]) def dependency_search_side_effect(value): if value == 'valid_module': return [666] elif value == 'dependent_module': return [668] mock_dp.dependency_search = MagicMock() mock_dp.dependency_search.side_effect = dependency_search_side_effect def dependency_read_side_effect(value): if value == [666]: return ['dependent_module'] else: return [] mock_dp.dependency_read = MagicMock() mock_dp.dependency_read.side_effect = dependency_read_side_effect mock_dg = mock_dp('valid_module') test_hierarchy = Tree() test_hierarchy.create_node('valid_module', 'valid_module') test_hierarchy.create_node('dependent_module', 'dependent_module', parent='valid_module') self.assertEqual( mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return nested dict when finding dependent modules' ) # Mock Down mock_client.stop() mock_dp.module_search.stop() mock_client.search.stop() mock_client.search = orig_client_search mock_dp.dependency_search.stop() mock_dp.module_search = orig_mod_search mock_dp.dependency_search = orig_dep_search
def test_create_tree(self): file_one = FileModel( "Bernardoow/Elm-SqlAlchemy-Replace/blob/master/tests/Tests.elm", 132, 4.21, 'KB', 1, 'elm') file_two = FileModel( "Bernardoow/Elm-SqlAlchemy-Replace/blob/master/src/test_model.py", 73, 2.62, 'KB', 1, 'py') file_three = FileModel( "Bernardoow/Elm-SqlAlchemy-Replace/blob/master/readme.md", 73, 2.62, 'Bytes', 1, 'md') tree_response = self.handler.create_tree([ file_one, file_two, file_three ]) tree = Tree() tree.create_node("[Bernardoow]", "Bernardoow") # root node tree.create_node("[Elm-SqlAlchemy-Replace]", "Elm-SqlAlchemy-Replace", parent="Bernardoow") tree.create_node("[blob]", "blob", parent="Elm-SqlAlchemy-Replace") tree.create_node("[master]", "master", parent="blob") tree.create_node("readme.md (73 linhas)", "readme.md", parent="master") tree.create_node("[tests]", "tests", parent="master") tree.create_node("Tests.elm (132 linhas)", "Tests.elm", parent="tests") tree.create_node("[src]", "src", parent="master") tree.create_node("test_model.py (73 linhas)", "test_model.py", parent="src") self.assertEqual(tree_response.to_json(), tree.to_json())
def test_03_get_hierarchy_for_module_returns_a_two_node_tree_when_another_module_depends_on_module(self, mock_client): """ Test that get_hierarchy_for_module returns a tree structure with two nodes if a dependent module is found :param mock_client: A mocked out version of erppeek.Client :return: """ # Mock Up mock_dp = DependencyGraph orig_mod_search = mock_dp.module_search orig_dep_search = mock_dp.dependency_search orig_client_search = mock_client.search mock_dp.module_search = MagicMock(return_value=[666]) def dependency_search_side_effect(value): if value == 'valid_module': return [666] elif value == 'dependent_module': return [668] mock_dp.dependency_search = MagicMock() mock_dp.dependency_search.side_effect = dependency_search_side_effect def dependency_read_side_effect(value): if value == [666]: return ['dependent_module'] else: return [] mock_dp.dependency_read = MagicMock() mock_dp.dependency_read.side_effect = dependency_read_side_effect mock_dg = mock_dp('valid_module') test_hierarchy = Tree() test_hierarchy.create_node('valid_module', 'valid_module') test_hierarchy.create_node('dependent_module', 'dependent_module', parent='valid_module') self.assertEqual(mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return nested dict when finding dependent modules') # Mock Down mock_client.stop() mock_dp.module_search.stop() mock_client.search.stop() mock_client.search = orig_client_search mock_dp.dependency_search.stop() mock_dp.module_search = orig_mod_search mock_dp.dependency_search = orig_dep_search
def tree_test(): test_titles = [ "deletion_mapping", "orthotheca", "genetically_modified_sperm", "category:intelligence" ] titles = dict((e, idx) for idx, e in enumerate(test_titles)) # Tree testing t = Tree() t.create_node("deletion_mapping",15) t.create_node("orthotheca",14, parent=15) t.create_node("genetically_modified_sperm",13, parent=14) t.create_node("category:intelligence",12, parent=14) t.show() json = t.to_json() print "\nAs JSON:" print json print "\nAnd parsed back into a tree." t2 = from_json(json, titles) t2.show()
avg = 0.0 if parent is not None: if data is not None: tree.create_node(id,id,data=data,parent=parent) else: tree.create_node(id,id,data=0,parent=parent) else: if data is not None: tree.create_node(id,id,data=data) else: tree.create_node(id,id,data=0) dataval[id] = 0 cursor2.append([id,region,parent,data]) region_db[id] = region complete_data = tree.to_json(with_data=True) resp = json.loads(complete_data) print('JSON RESULT IS ',resp) iterdict(resp) tree21 = Tree() for id,region,parent,data,avg in cursor3: data = None print(id,end=' ') print(region,end=' ') if parent is not None: print(int(parent),end=' ') else: print(None,end=' ')
def construct_tree(self, file_path, child_node_index, json_file_location): node_list = [] try: directory = os.path.join(file_path) for root, dirs, files in os.walk(directory): for file in files: if str(file).endswith(".csv"): f = open(directory + file, 'r') csv_reader = csv.reader(f, delimiter=',') row_index = 0 filename = os.path.basename(f.name) rows = [] dict = {"Root": "root", filename: filename.lower()} esg_tree = Tree() esg_tree.create_node( "Root", "root", data=jsonpickle.encode( NodeParam('source', 'attr', 'desc', 'root'), unpicklable=False)) # root node node = Node( 'root', 'Root', '', '', jsonpickle.encode(NodeParam( 'source', 'attr', 'desc', 'root'), unpicklable=False)) node_list.append(node) esg_tree.create_node(filename, filename.lower(), parent='root', data=jsonpickle.encode( NodeParam( 'source', 'attr', 'desc', str(uuid.uuid1())), unpicklable=False)) node = Node( filename.lower(), filename, 'root', '', jsonpickle.encode(NodeParam( 'source', 'attr', 'desc', str(uuid.uuid1())), unpicklable=False)) node_list.append(node) for row in csv_reader: rows.append(row) for row in rows: if row_index != 0: column_index = 0 # data = row for curr_column in row: if str(curr_column) + str( row[0]) not in dict: if column_index > child_node_index: if "\n" in curr_column: for rowData in curr_column.splitlines( ): node_id_key = str( rowData) + str(row[0]) dict[ node_id_key] = uuid.uuid1( ) esg_tree.create_node( rowData, str( dict.get( node_id_key)), parent=str( dict.get( str(row[3]) + str(row[0]))), data=jsonpickle. encode(NodeParam( (rows[0] )[column_index], 'attr', str(rowData).lower( ), str( dict.get( node_id_key ))), unpicklable=False )) node = Node( str( dict.get( node_id_key)), rowData, str( dict.get( str(row[3]) + str(row[0]))), '', jsonpickle. encode(NodeParam( (rows[0] )[column_index], 'attr', str(rowData).lower( ), str( dict.get( node_id_key ))), unpicklable=False )) node_list.append(node) elif curr_column != '': node_id_key = str( curr_column) + str(row[0]) dict[node_id_key] = uuid.uuid1( ) esg_tree.create_node( curr_column, str(dict.get(node_id_key)), parent=str( dict.get( str(row[3]) + str(row[0]))), data=jsonpickle. encode(NodeParam( (rows[0] )[column_index], 'attr', str(curr_column).lower( ), str( dict.get( node_id_key))), unpicklable=False)) node = Node( str(dict.get(node_id_key)), curr_column, str( dict.get( str(row[3]) + str(row[0]))), '', jsonpickle. encode(NodeParam( (rows[0] )[column_index], 'attr', str(curr_column).lower( ), str( dict.get( node_id_key))), unpicklable=False)) node_list.append(node) else: node_id_key = str( curr_column) + str(row[0]) dict[node_id_key] = uuid.uuid1() if column_index == 0: esg_tree.create_node( curr_column, str(dict.get(node_id_key)), parent=str( dict.get(filename)), data=jsonpickle. encode(NodeParam( (rows[0] )[column_index], 'attr', str(curr_column).lower( ), str( dict.get( node_id_key))), unpicklable=False)) node = Node( str(dict.get(node_id_key)), curr_column, str(dict.get(filename)), '', jsonpickle. encode(NodeParam( (rows[0] )[column_index], 'attr', str(curr_column).lower( ), str( dict.get( node_id_key))), unpicklable=False)) node_list.append(node) else: esg_tree.create_node( curr_column, str(dict.get(node_id_key)), parent=str( dict.get( str(row[ column_index - 1]) + str(row[0]))), data=jsonpickle. encode(NodeParam( (rows[0] )[column_index], 'attr', str(curr_column).lower( ), str( dict.get( node_id_key))), unpicklable=False)) node = Node( str(dict.get(node_id_key)), curr_column, str( dict.get( str(row[ column_index - 1]) + str(row[0]))), '', jsonpickle. encode(NodeParam( (rows[0] )[column_index], 'attr', str(curr_column).lower( ), str( dict.get( node_id_key))), unpicklable=False)) node_list.append(node) column_index += 1 row_index += 1 f.close() filename = filename.replace(".csv", '') with open(json_file_location + filename + ".txt", "wb") as outfile: esg_tree.save2file(json_file_location + filename + ".json") pickle.dump(esg_tree, outfile) esgDatabase().add_data(node_list) print(esg_tree.to_json(with_data=True)) return 'success' except OSError: print("Path not found exception") return 'failed' except IOError: print('An error occurred trying to read the file.') f.close() return 'failed' except Exception as e: print("An error occurred while creating a tree") print(e) return 'failed'
class StepParse: def __init__(self): pass def load_step(self, step_filename): self.nauo_lines = [] self.prod_def_lines = [] self.prod_def_form_lines = [] self.prod_lines = [] self.filename = os.path.splitext(step_filename)[0] line_hold = '' line_type = '' # Find all search lines with open(step_filename) as f: for line in f: # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines index = re.search("#(.*)=", line) if index: # TH: if not none then it is the start of a line so read it # want to hold line until it has checked next line # if next line is a new indexed line then save previous line if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' prev_index = True # TH remember previous line had an index if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line: line_hold = line.rstrip() line_type = 'nauo' elif ('PRODUCT_DEFINITION ' in line or 'PRODUCT_DEFINITION(' in line): line_hold = line.rstrip() line_type = 'prod_def' elif 'PRODUCT_DEFINITION_FORMATION' in line: line_hold = line.rstrip() line_type = 'prod_def_form' elif ('PRODUCT ' in line or 'PRODUCT(' in line): line_hold = line.rstrip() line_type = 'prod' else: prev_index = False #TH: if end of file and previous line was held if 'ENDSEC;' in line: if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' else: #TH: if not end of file line_hold = line_hold + line.rstrip() self.nauo_refs = [] self.prod_def_refs = [] self.prod_def_form_refs = [] self.prod_refs = [] # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words' # Find all (# hashed) line references and product names # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't for j, el_ in enumerate(self.nauo_lines): self.nauo_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_lines): self.prod_def_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_form_lines): self.prod_def_form_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_lines): self.prod_refs.append([ el.strip(',') for el in el_.replace(",", " ").replace( "(", " ").replace("=", " ").split() if el.startswith('#') ]) self.prod_refs[j].append(el_.split("'")[1]) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j, el_ in enumerate(self.prod_all_refs): # Add 'PRODUCT_DEFINITION' ref for i, el in enumerate(self.prod_def_form_refs): if el[0] == el_[1]: el_.append(el[1]) break # Add names from 'PRODUCT_DEFINITION' lines for i, el in enumerate(self.prod_refs): if el[0] == el_[2]: el_.append(el[2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) #TH: find root node self.root_type_refs = set(self.parent_refs) - set(self.child_refs) # Create simple parts dictionary (ref + label) self.part_dict = {el[0]: el[3] for el in self.prod_all_refs} # self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs} def show_values(self): # TH: basic testing, if needed these could be spilt up print(self.nauo_lines) print(self.prod_def_lines) print(self.prod_def_form_lines) print(self.prod_lines) print(self.nauo_refs) print(self.prod_def_refs) print(self.prod_def_form_refs) print(self.prod_refs) # HR: "create_dict" replaced by list comprehension elsewhere # # def create_dict(self): # # # TH: links nauo number with a name and creates dict # self.part_dict = {} # for part in self.all_type_refs: # for sublist in self.prod_def_refs: # if sublist[0] == part: # prod_loc = '#' + re.findall('\d+',sublist[1])[0] # pass # for sublist in self.prod_def_form_refs: # if sublist[0] == prod_loc: # prod_loc = '#' + str(re.findall('\d+',sublist[1])[0]) # pass # for sublist in self.prod_refs: # if sublist[0] == prod_loc: # part_name = sublist[2] # # self.part_dict[part] = part_name def create_tree(self): #TH: create tree diagram in newick format #TH: find root node self.tree = Tree() #TH: check if there are any parts to make a tree from, if not don't bother if self.part_dict == {}: return root_node_ref = list(self.root_type_refs)[0] # HR added part reference as data for later use self.tree.create_node(self.part_dict[root_node_ref], 0, data={'ref': root_node_ref}) #TH: created root node now fill in next layer #TH: create dict for tree, as each node needs a unique name i = [0] # Iterates through nodes self.tree_dict = {} self.tree_dict[i[0]] = root_node_ref def tree_next_layer(self, parent): root_node = self.tree_dict[i[0]] for line in self.nauo_refs: if line[1] == root_node: i[0] += 1 self.tree_dict[i[0]] = str(line[2]) # HR added part reference as data for later use self.tree.create_node(self.part_dict[line[2]], i[0], parent=parent, data={'ref': str(line[2])}) tree_next_layer(self, i[0]) tree_next_layer(self, 0) self.appended = False self.get_levels() def get_levels(self): # Initialise dict and get first level (leaves) self.levels = {} self.levels_set_p = set() self.levels_set_a = set() self.leaf_ids = [el.identifier for el in self.tree.leaves()] self.all_ids = [el for el in self.tree.nodes] self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids) self.part_level = 1 def do_level(self, tree_level): # Get all nodes within this level node_ids = [ el for el in self.tree.nodes if self.tree.level(el) == tree_level ] for el in node_ids: # If leaf, then n_p = 1 and n_a = 1 if el in self.leaf_ids: self.levels[el] = {} self.levels[el]['n_p'] = self.part_level self.levels[el]['n_a'] = self.part_level # If assembly, then get all children and sum all parts + assemblies else: # Get all children of node and sum levels child_ids = self.tree.is_branch(el) child_sum_p = 0 child_sum_a = 0 for el_ in child_ids: child_sum_p += self.levels[el_]['n_p'] child_sum_a += self.levels[el_]['n_a'] self.levels[el] = {} self.levels[el]['n_p'] = child_sum_p self.levels[el]['n_a'] = child_sum_a + 1 self.levels_set_p.add(child_sum_p) self.levels_set_a.add(child_sum_a + 1) # Go up through tree levels and populate lattice level dict for i in range(self.tree.depth(), -1, -1): do_level(self, i) self.create_lattice() self.levels_p_sorted = sorted(list(self.levels_set_p)) self.levels_a_sorted = sorted(list(self.levels_set_a)) # Function to return dictionary of item IDs for each lattice level def get_levels_inv(list_in, key): #Initialise levels_inv = {} levels_inv[self.part_level] = [] for el in list_in: levels_inv[el] = [] for k, v in self.levels.items(): levels_inv[v[key]].append(k) return levels_inv self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p') self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a') def get_all_children(self, id_): ancestors = [el.identifier for el in self.tree.children(id_)] parents = ancestors while parents: children = [] for parent in parents: children = [el.identifier for el in self.tree.children(parent)] ancestors.extend(children) parents = children return ancestors def create_lattice(self): # Create lattice self.g = nx.DiGraph() self.default_colour = 'r' # Get root node and set parent to -1 to maintain data type of "parent" # Set position to top/middle node_id = self.tree.root label_text = self.tree.get_node(node_id).tag self.g.add_node(node_id, parent=-1, label=label_text, colour=self.default_colour) # Do nodes from treelib "nodes" dictionary for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier label_text = self.tree.get_node(key).tag # Node IDs same as for tree self.g.add_node(key, parent=parent_id, label=label_text, colour=self.default_colour) # Do edges from nodes for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier self.g.add_edge(key, parent_id) # Escape if only one node # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD if self.tree.size() == 1: id_ = [el.identifier for el in self.tree.leaves()] self.g.nodes[id_[-1]]['pos'] = (0, 0) return # Get set of parents of leaf nodes leaf_parents = set( [self.tree.parent(el).identifier for el in self.leaf_ids]) # For each leaf_parent, set position of leaf nodes sequentially i = 0 no_leaves = len(self.tree.leaves()) for el in leaf_parents: for el_ in self.tree.is_branch(el): child_ids = [el.identifier for el in self.tree.leaves()] if el_ in child_ids: self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1) i += 1 # To set plot positions of nodes from lattice levels # --- # Traverse upwards from leaves for el in sorted(list(self.levels_set_a)): # Get all nodes at that level node_ids = [k for k, v in self.levels.items() if v['n_a'] == el] # Get all positions of children of that node # and set position as mean value of them for el_ in node_ids: child_ids = self.tree.is_branch(el_) pos_sum = 0 for el__ in child_ids: pos_ = self.g.nodes[el__]['pos'][0] pos_sum += pos_ pos_sum = pos_sum / len(child_ids) self.g.nodes[el_]['pos'] = (pos_sum, el) def print_tree(self): try: self.tree.show() except: self.create_tree() self.tree.show() def tree_to_json(self, save_to_file=False, filename='file', path=''): #TH: return json format tree, can also save to file if self.tree.size() != 0: data = self.tree.to_json() j = json.loads(data) if save_to_file == True: if path: file_path = os.path.join(path, filename) else: file_path = filename with open(file_path + '.json', 'w') as outfile: json.dump(j, outfile) return data else: print("no tree to print") return
def tree_build_from_list(containers): """ Build a tree based on a unsorted list. Build a tree of containers based on an unsorted list of containers. Example: -------- >>> containers = [ { "childContainerKey": null, "configlets": [], "devices": [], "imageBundle": "", "key": "root", "name": "Tenant", "parentName": null }, { "childContainerKey": null, "configlets": [ "veos3-basic-configuration" ], "devices": [ "veos-1" ], "imageBundle": "", "key": "container_43_840035860469981", "name": "staging", "parentName": "Tenant" }] >>> print(tree_build_from_list(containers=containers)) {"Tenant": {"children": [{"Fabric": {"children": [{"Leaves": {"children": ["MLAG01", "MLAG02"]}}, "Spines"]}}]}} Parameters ---------- containers : dict, optional Container topology to create on CVP, by default None Returns ------- json tree topology """ # Create tree object tree = Tree() # Create the base node previously_created = list() # Create root node to mimic CVP behavior tree.create_node("Tenant", "Tenant") # Iterate for first level of containers directly attached under root. for cvp_container in containers: if cvp_container['parentName'] is None: continue elif cvp_container['parentName'] in ['Tenant']: previously_created.append(cvp_container['name']) tree.create_node(cvp_container['name'], cvp_container['name'], parent=cvp_container['parentName']) # Loop since expected tree is not equal to number of entries in container topology while len(tree.all_nodes()) < len(containers): for cvp_container in containers: if tree.contains( cvp_container['parentName'] ): # and cvp_container['parentName'] not in ['Tenant'] try: tree.create_node(cvp_container['name'], cvp_container['name'], parent=cvp_container['parentName']) except: # noqa E722 continue return tree.to_json()
def tree_build_from_dict(containers=None): """ Build a tree based on a unsorted dictConfig(config). Build a tree of containers based on an unsorted dict of containers. Example: -------- >>> containers = {'Fabric': {'parent_container': 'Tenant'}, 'Leaves': {'configlets': ['container_configlet'], 'devices': ['veos01'], 'images': ['4.22.0F'], 'parent_container': 'Fabric'}, 'MLAG01': {'configlets': ['container_configlet'], 'devices': ['veos01'], 'images': ['4.22.0F'], 'parent_container': 'Leaves'}, 'MLAG02': {'configlets': ['container_configlet'], 'devices': ['veos01'], 'images': ['4.22.0F'], 'parent_container': 'Leaves'}, 'Spines': {'configlets': ['container_configlet'], 'devices': ['veos01'], 'images': ['4.22.0F'], 'parent_container': 'Fabric'}} >>> print(tree_build_from_dict(containers=containers)) {"Tenant": {"children": [{"Fabric": {"children": [{"Leaves": {"children": ["MLAG01", "MLAG02"]}}, "Spines"]}}]}} Parameters ---------- containers : dict, optional Container topology to create on CVP, by default None Returns ------- json tree topology """ # Create tree object tree = Tree() # Create the base node previously_created = list() # Create root node to mimic CVP behavior tree.create_node("Tenant", "Tenant") # Iterate for first level of containers directly attached under root. for container_name, container_info in containers.items(): if container_info['parent_container'] in ['Tenant']: previously_created.append(container_name) tree.create_node(container_name, container_name, parent=container_info['parent_container']) # Loop since expected tree is not equal to number of entries in container topology while len(tree.all_nodes()) < len(containers) + 1: for container_name, container_info in containers.items(): if tree.contains( container_info['parent_container'] ) and container_info['parent_container'] not in ['Tenant']: try: tree.create_node(container_name, container_name, parent=container_info['parent_container']) except: # noqa E722 continue return tree.to_json()
def test_04_get_hierarchy_renames_duplicate_modules(self, mock_client): """ Test that get_hierarchy_for_module renames the second instance of a dependent modules ID so can list that module multiple times - valid_module - dependent_module_one - dependent_module_two - dependent_mdodule_one1 :param mock_client: A mocked out version of erppeek.Client :return: """ # Mock Up mock_dp = DependencyGraph orig_mod_search = mock_dp.module_search orig_dep_search = mock_dp.dependency_search orig_client_search = mock_client.search mock_dp.module_search = MagicMock(return_value=[666]) def dependency_search_side_effect(value): if value == 'valid_module': return [666] elif value == 'dependent_module_one': return [668] elif value == 'dependent_module_two': return [664] mock_dp.dependency_search = MagicMock() mock_dp.dependency_search.side_effect = dependency_search_side_effect def dependency_read_side_effect(value): if value == [666]: return ['dependent_module_one', 'dependent_module_two'] elif value == [664]: return ['dependent_module_one'] else: return [] mock_dp.dependency_read = MagicMock() mock_dp.dependency_read.side_effect = dependency_read_side_effect mock_dg = mock_dp('valid_module') test_hierarchy = Tree() test_hierarchy.create_node('valid_module', 'valid_module') test_hierarchy.create_node('dependent_module_one', 'dependent_module_one', parent='valid_module') test_hierarchy.create_node('dependent_module_two', 'dependent_module_two', parent='valid_module') test_hierarchy.create_node('dependent_module_one', 'dependent_module_one1', parent='dependent_module_two') self.assertEqual( mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return nested dict when finding dependent modules' ) # Mock Down mock_client.stop() mock_dp.module_search.stop() mock_client.search.stop() mock_client.search = orig_client_search mock_dp.dependency_search.stop() mock_dp.module_search = orig_mod_search mock_dp.dependency_search = orig_dep_search
class StepParse: def __init__(self): pass def load_step(self, step_filename): self.nauo_lines = [] self.prod_def_lines = [] self.prod_def_form_lines = [] self.prod_lines = [] self.filename = os.path.splitext(step_filename)[0] line_hold = '' line_type = '' # Find all search lines with open(step_filename) as f: for line in f: # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines index = re.search("#(.*)=", line) if index: # TH: if not none then it is the start of a line so read it # want to hold line until it has checked next line # if next line is a new indexed line then save previous line if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' prev_index = True # TH rememeber previous line had an index if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line: line_hold = line.rstrip() line_type = 'nauo' elif ('PRODUCT_DEFINITION ' in line or 'PRODUCT_DEFINITION(' in line): line_hold = line.rstrip() line_type = 'prod_def' elif 'PRODUCT_DEFINITION_FORMATION' in line: line_hold = line.rstrip() line_type = 'prod_def_form' elif ('PRODUCT ' in line or 'PRODUCT(' in line): line_hold = line.rstrip() line_type = 'prod' else: prev_index = False #TH: if end of file and previous line was held if 'ENDSEC;' in line: if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' else: #TH: if not end of file line_hold = line_hold + line.rstrip() self.nauo_refs = [] self.prod_def_refs = [] self.prod_def_form_refs = [] self.prod_refs = [] # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words' # Find all (# hashed) line references and product names # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't for j in range(len(self.nauo_lines)): self.nauo_refs.append([ el.rstrip(',') for el in self.nauo_lines[j].replace(",", " ").replace( "=", " ").split() if el.startswith('#') ]) for j in range(len(self.prod_def_lines)): self.prod_def_refs.append([ el.rstrip(',') for el in self.prod_def_lines[j].replace( ",", " ").replace("=", " ").split() if el.startswith('#') ]) for j in range(len(self.prod_def_form_lines)): self.prod_def_form_refs.append([ el.rstrip(',') for el in self.prod_def_form_lines[j].replace( ",", " ").replace("=", " ").split() if el.startswith('#') ]) for j in range(len(self.prod_lines)): self.prod_refs.append([ el.strip(',') for el in self.prod_lines[j].replace(",", " ").replace( "(", " ").replace("=", " ").split() if el.startswith('#') ]) self.prod_refs[j].append(self.prod_lines[j].split("'")[1]) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j in range(len(self.prod_all_refs)): # Add 'PRODUCT_DEFINITION' ref for i in range(len(self.prod_def_form_refs)): if self.prod_def_form_refs[i][0] == self.prod_all_refs[j][1]: self.prod_all_refs[j].append(self.prod_def_form_refs[i][1]) break # Add names from 'PRODUCT_DEFINITION' lines for i in range(len(self.prod_refs)): if self.prod_refs[i][0] == self.prod_all_refs[j][2]: self.prod_all_refs[j].append(self.prod_refs[i][2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j in range(len(self.prod_all_refs)): # Add 'PRODUCT_DEFINITION' ref for i in range(len(self.prod_def_form_refs)): if self.prod_def_form_refs[i][0] == self.prod_all_refs[j][1]: self.prod_all_refs[j].append(self.prod_def_form_refs[i][1]) break # Add names from 'PRODUCT_DEFINITION' lines for i in range(len(self.prod_refs)): if self.prod_refs[i][0] == self.prod_all_refs[j][2]: self.prod_all_refs[j].append(self.prod_refs[i][2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) #TH: find root node self.root_type_refs = set(self.parent_refs) - set(self.child_refs) self.create_dict() def show_values(self): # TH: basic testing, if needed these could be spilt up print(self.nauo_lines) print(self.prod_def_lines) print(self.prod_def_form_lines) print(self.prod_lines) print(self.nauo_refs) print(self.prod_def_refs) print(self.prod_def_form_refs) print(self.prod_refs) def create_dict(self): # TH: links nauo number with a name and creates dict self.part_dict = {} for part in self.all_type_refs: for sublist in self.prod_def_refs: if sublist[0] == part: prod_loc = '#' + re.findall('\d+', sublist[1])[0] pass for sublist in self.prod_def_form_refs: if sublist[0] == prod_loc: prod_loc = '#' + str(re.findall('\d+', sublist[1])[0]) pass for sublist in self.prod_refs: if sublist[0] == prod_loc: part_name = sublist[2] self.part_dict[part] = part_name def create_tree(self): #TH: create tree diagram in newick format #TH: find root node self.tree = Tree() #TH: check if there are any parts to make a tree from, if not don't bother if self.part_dict == {}: return root_node_ref = list(self.root_type_refs)[0] self.tree.create_node(self.part_dict[root_node_ref], 0) #TH: created root node now fill in next layer #TH: create dict for tree, as each node needs a unique name i = [0] # itirates through nodes self.tree_dict = {} self.tree_dict[i[0]] = root_node_ref def tree_next_layer(self, parent): root_node = self.tree_dict[i[0]] for line in self.nauo_refs: if line[1] == root_node: i[0] += 1 self.tree_dict[i[0]] = str(line[2]) self.tree.create_node(self.part_dict[line[2]], i[0], parent=parent) tree_next_layer(self, i[0]) tree_next_layer(self, 0) def print_tree(self): try: self.tree.show() except: self.create_tree() self.tree.show() def tree_to_json(self, save_to_file=False, filename='file', path=''): #TH: return json format tree, can also save to file if self.tree.size() != 0: data = self.tree.to_json() j = json.loads(data) if save_to_file == True: if path: file_path = os.path.join(path, filename) else: file_path = filename with open(file_path + '.json', 'w') as outfile: json.dump(j, outfile) return data else: print("no tree to print") return
class DataCrawler(): USERNAME = "******" PASSWORD = "******" LOGIN_URL = "https://www.tgwiki.com/CookieAuth.dll?GetLogon?curl=Z2F&reason=0&formdir=9" URL = "https://www.tgwiki.com" DIRECTORY = ["RootFolder"] URL_suffix = ".aspx" EXCEPTION_MENU_ITEM = "Service Level Agreement" browser = None dataTree = None def __init__(self): # self.browser = webdriver.Chrome() self.array = [] self.dataTree = Tree() self.dataTree.create_node("Homepage", "homepage/", data=self.URL) def login(self): self.browser.get(self.LOGIN_URL) username = self.browser.find_element_by_id('username') username.send_keys(self.USERNAME) password = self.browser.find_element_by_id('password') password.send_keys(self.PASSWORD) self.browser.find_element_by_id('SubmitCreds').click() self.browser.find_element_by_xpath('//a[@href="/department"]').click() self.browser.find_element_by_xpath( '//a[@href="/department/citd"]').click() html = self.browser.page_source return html def get_HTML_From_URL(self, url): print("Accessing " + str(url)) self.browser.get(url) html = self.browser.page_source return html def get_menu(self, soup): result = soup.find(class_="menu vertical menu-vertical") result_in_static = result.findAll("li", class_="static") for ele in result_in_static: # print("-------------------------------------------------------") name = ele.find(class_="menu-item-text") inner_ele = ele.findAll("li", class_="dynamic") link = None parentID = "homepage/" if (inner_ele == []): # print(name.get_text()) # FOR DEBUGGING link_tag, link = self.parseLink(ele) self.dataTree.create_node(name.get_text(), parentID + name.get_text().lower() + "/", data=link, parent=parentID) if (self.isDirectory(link)): _html = self.get_HTML_From_URL(link) soup = BeautifulSoup(_html, "lxml") self.parseTable(soup, parentID + link_tag.get_text().lower() + "/") # else: # FOR DEBUGGING # print(name.get_text()) # FOR DEBUGGING if (name.get_text() == "Technology Update"): print("") # self.dataTree.show(idhidden=False) for small_ele in inner_ele: parentID = "homepage/" + name.get_text().lower() + "/" link_tag, link = self.parseLink(small_ele, _parent=parentID) print("CHECKING IF " + str(link) + " IS DIRECTORY...") #self.dataTree.show() # self.dataTree.create_node(small_ele.get_text(), small_ele.get_text().lower(), data=link, parent=name.get_text().lower()) if (self.isDirectory(link)): _html = self.get_HTML_From_URL(link) soup = BeautifulSoup(_html, "lxml") self.parseTable( soup, parentID + link_tag.get_text().lower() + "/") # print("-------------------------------------------------------") def parseLink(self, soup_result, _parent=None): print("praseLink") link_tag = soup_result.a link = None if (link_tag != None): link = link_tag.get('href') # print(link_tag.get_text()) # print(link) if (link[0] == '/'): link = self.URL + link if (_parent != None): print("############") print("Tag: " + str(link_tag)) print("Text: " + link_tag.get_text()) print("Link: " + str(link)) print("Parent: " + str(_parent)) print("############") # self.dataTree.show(idhidden=False) # if(link_tag.get_text() == self.EXCEPTION_MENU_ITEM): # if(self.dataTree.contains(self.EXCEPTION_MENU_ITEM.lower())): # print("dfgdfgdgdfgdfgd") # return link_tag, link try: self.dataTree.create_node( str(link_tag.get_text()), _parent + str(link_tag.get_text().lower() + "/"), data=link, parent=_parent) except treelib.tree.DuplicatedNodeIdError: print("duplicated") return link_tag, link # if(self.isDirectory(temp)): # _html = self.get_HTML_From_URL(temp) # soup = BeautifulSoup(_html, "lxml") # self.parseTable(soup, link.get_text().lower()) return link_tag, link def parseTable(self, soup_result, _parent=None): print("-------------------------------------------------------") print("parseTable") try: table_list = soup_result.findAll("table") for table in table_list: if (table.has_attr("summary")): table_body = table.find('tbody') row_list = table_body.findAll( 'td', attrs={"class": "ms-vb-title"}) for x in range(0, len(row_list)): link_tag, link = self.parseLink(row_list[x], _parent) if (self.isDirectory(link)): _html = self.get_HTML_From_URL(link) soup = BeautifulSoup(_html, "lxml") self.parseTable( soup, _parent + link_tag.get_text().lower() + "/") break except AttributeError as e: print(e) print("-------------------------------------------------------") def isDirectory(self, link): if (link == None): return False isDirectory = False if (self.URL_suffix == link[-5:]): return True elif (link[-1:] == "/"): print("dfasfdasfafafsdfdf") return True for directory in self.DIRECTORY: if (directory in link): isDirectory = True return isDirectory def writeToJSONFile(self, path, fileName, data): filePathNameWExt = './' + path + '/' + fileName + '.json' with open(filePathNameWExt, 'w') as fp: json.dump(data, fp) def main(self): _html = self.login() soup = BeautifulSoup(_html, "lxml") self.get_menu(soup) self.dataTree.show() tree_in_dict = self.dataTree.to_json(with_data=True) tree_in_json = json.dumps(tree_in_dict, indent=4, sort_keys=True, ensure_ascii=False) self.writeToJSONFile('./', 'training', tree_in_json) self.dataTree.save2file('tree_diagiam.json') # def process_node(self, node): # if(node.) def test(self): file_directory = "./ITSM_training.json" json_data = open(file_directory).read() data = json.loads(json_data) hello = node.Tree(tree=data) print(hello)
from treelib import Node, Tree import json with open('jsondata.txt') as json_file: data = json.load(json_file) tree = Tree() tree.create_node(identifier='0', data='<html></html>') #print(len(data["tag"])) key = {"sd"} key.clear() for k, v in data.items(): for i in v: _id = str(i['id']) _tag = str(i['tag']) parent = i['parent'] for x in parent: tree.create_node(identifier=_id, parent=str(x), data=_tag) tree.show() #tree.show() x = tree.to_json() print(x) tree.save2file('tree.txt', data_property=True) # print(x) # print(key)
# except: # continue # else: # continue # except: # continue # else: # continue # except: # continue # else: # continue # else: # continue if __name__ == "__main__": main() # print(t.to_json(with_data=False)) t.show() print(t.to_json(with_data=False)) # for node in t.traverse("preorder"): # # Do some analysis on node # print(node.render) # r = Rubrik() # r.add_parent("test") # r.printRubrik()
domain = tldextract.extract(url).domain subdomain = tldextract.extract(url).subdomain if not (tree.contains(domain)): tree.create_node(domain, domain, parent="ID of root node") #Add domains to root node if subdomain: tree.create_node(subdomain, subdomain+domain, parent=domain) #Add sub-domains to domain node file.close() tree.show(line_type="ascii-emv") #show data as stdout tree.to_graphviz(filename="tree_graphviz") #dump tree as graphviz #dot xxx -Tps -o test.ps -Grankdir=LR #left to right subprocess.call(["dot", "tree_graphviz", "-Tps", "-o" ,"output.ps" ,"-Grankdir=LR"]) #Grankdir=LR option to build tree from left to right #convert -flatten -density 150 -geometry 100% test.ps test.png subprocess.call(["convert" ,"-flatten" ,"-density" ,"150" ,"-geometry" ,"100%" ,"output.ps" , "tree_graphviz.png"],stderr=subprocess.DEVNULL) #convert graphviz to png # rm -rf tree_graphviz output.ps subprocess.call(["rm", "-rf", "tree_graphviz", "output.ps"]) #clear files if os.path.exists("output.txt"): #dump tree as text file subprocess.call(["rm", "-rf", "output.txt"]) tree.save2file('output.txt',line_type="ascii-emv") with open('output.json', 'w') as f: #dump tree as json form f.write(tree.to_json(with_data=True))