def test_02_get_hierarchy_for_module_returns_single_node_when_nothing_depend_on_module(self, mock_client):
		"""
		Test that get_hierarchy_for_module returns a single node tree structure if no dependent modules are found
		:param mock_client: A mocked out version of erppeek.Client
		:return:
		"""
		# Mock Up
		mock_dp = DependencyGraph
		orig_mod_search = mock_dp.module_search
		orig_dep_search = mock_dp.dependency_search
		orig_client_search = mock_client.search
		mock_dp.module_search = MagicMock(return_value=[666])
		mock_dp.dependency_search = MagicMock(return_value=[])

		mock_dg = mock_dp('valid_module')
		test_hierarchy = Tree()
		test_hierarchy.create_node('valid_module', 'valid_module')
		self.assertEqual(mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return [] when finding no dependent modules')

		# Mock Down
		mock_client.stop()
		mock_dp.module_search.stop()
		mock_client.search.stop()
		mock_client.search = orig_client_search
		mock_dp.dependency_search.stop()
		mock_dp.module_search = orig_mod_search
		mock_dp.dependency_search = orig_dep_search
Example #2
0
class TreePipeline(object):

    def open_spider(self, spider):
        self.tree = Tree()
        self.tree.create_node("root", "root")

    def process_item(self, item, spider):
        lst = item['text']
        lst = [x.strip() for x in [y.replace('...', '') for y in lst]]
        item['pagetitle'] = item['pagetitle'].replace('...', '')
        lst[-1] = item['pagetitle']
        for idx, elem in enumerate(lst):
            if idx == 0:
                previous = "root"
            else:
                previous = "|".join(lst[:idx])
            elem = "|".join(lst[:idx + 1])
            # elem = elem.replace('...', '')
            elem = elem.encode('utf-8').decode('utf-8')
            if not self.tree.contains(elem):
                print "Adding node %s" % elem
                self.tree.create_node(elem, elem, parent=previous)
                # self.tree.show()
        return item

    def close_spider(self, spider):
        self.tree.show()
        with open(makepath('data/cats/tree.json'), 'w') as outfile:
            outfile.write(self.tree.to_json())
        self.tree.save2file(makepath('data/cats/tree.tree'))
    def test_02_get_hierarchy_for_module_returns_single_node_when_nothing_depend_on_module(
            self, mock_client):
        """
		Test that get_hierarchy_for_module returns a single node tree structure if no dependent modules are found
		:param mock_client: A mocked out version of erppeek.Client
		:return:
		"""
        # Mock Up
        mock_dp = DependencyGraph
        orig_mod_search = mock_dp.module_search
        orig_dep_search = mock_dp.dependency_search
        orig_client_search = mock_client.search
        mock_dp.module_search = MagicMock(return_value=[666])
        mock_dp.dependency_search = MagicMock(return_value=[])

        mock_dg = mock_dp('valid_module')
        test_hierarchy = Tree()
        test_hierarchy.create_node('valid_module', 'valid_module')
        self.assertEqual(
            mock_dg.hierarchy.to_json(), test_hierarchy.to_json(),
            'get_hierarchy_for_module did not return [] when finding no dependent modules'
        )

        # Mock Down
        mock_client.stop()
        mock_dp.module_search.stop()
        mock_client.search.stop()
        mock_client.search = orig_client_search
        mock_dp.dependency_search.stop()
        mock_dp.module_search = orig_mod_search
        mock_dp.dependency_search = orig_dep_search
	def test_04_get_hierarchy_renames_duplicate_modules(self, mock_client):
		"""
		Test that get_hierarchy_for_module renames the second instance of a dependent modules ID so can list that module multiple times
		- valid_module
		  - dependent_module_one
		  - dependent_module_two
		    - dependent_mdodule_one1

		:param mock_client: A mocked out version of erppeek.Client
		:return:
		"""
		# Mock Up
		mock_dp = DependencyGraph
		orig_mod_search = mock_dp.module_search
		orig_dep_search = mock_dp.dependency_search
		orig_client_search = mock_client.search
		mock_dp.module_search = MagicMock(return_value=[666])

		def dependency_search_side_effect(value):
			if value == 'valid_module':
				return [666]
			elif value == 'dependent_module_one':
				return [668]
			elif value == 'dependent_module_two':
				return [664]

		mock_dp.dependency_search = MagicMock()
		mock_dp.dependency_search.side_effect = dependency_search_side_effect

		def dependency_read_side_effect(value):
			if value == [666]:
				return ['dependent_module_one', 'dependent_module_two']
			elif value == [664]:
				return ['dependent_module_one']
			else:
				return []

		mock_dp.dependency_read = MagicMock()
		mock_dp.dependency_read.side_effect = dependency_read_side_effect

		mock_dg = mock_dp('valid_module')
		test_hierarchy = Tree()
		test_hierarchy.create_node('valid_module', 'valid_module')
		test_hierarchy.create_node('dependent_module_one', 'dependent_module_one', parent='valid_module')
		test_hierarchy.create_node('dependent_module_two', 'dependent_module_two', parent='valid_module')
		test_hierarchy.create_node('dependent_module_one', 'dependent_module_one1', parent='dependent_module_two')
		self.assertEqual(mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return nested dict when finding dependent modules')

		# Mock Down
		mock_client.stop()
		mock_dp.module_search.stop()
		mock_client.search.stop()
		mock_client.search = orig_client_search
		mock_dp.dependency_search.stop()
		mock_dp.module_search = orig_mod_search
		mock_dp.dependency_search = orig_dep_search
Example #5
0
def testUnjsonify():
    tree = Tree()
    tree.create_node('home', 'home')
    tree.create_node('phone', 'phone', parent='home')
    tree.create_node('laptop', 'laptop', parent='home')
    tree.create_node('screen', 'screen', parent='laptop')
    tree.create_node(19, 19, parent='home')
    tree.create_node((1, 2), (1, 2), parent='screen')
    j = tree.to_json()
    unjsonify(j).show()
Example #6
0
def testUnjsonify():
    tree = Tree()
    tree.create_node('home', 'home')
    tree.create_node('phone', 'phone', parent='home')
    tree.create_node('laptop', 'laptop', parent='home')
    tree.create_node('screen', 'screen', parent='laptop')
    tree.create_node(19, 19, parent='home')
    tree.create_node((1,2), (1,2), parent='screen')
    j = tree.to_json()
    unjsonify(j).show()
    def test_03_get_hierarchy_for_module_returns_a_two_node_tree_when_another_module_depends_on_module(
            self, mock_client):
        """
		Test that get_hierarchy_for_module returns a tree structure with two nodes if a dependent module is found
		:param mock_client: A mocked out version of erppeek.Client
		:return:
		"""
        # Mock Up
        mock_dp = DependencyGraph
        orig_mod_search = mock_dp.module_search
        orig_dep_search = mock_dp.dependency_search
        orig_client_search = mock_client.search
        mock_dp.module_search = MagicMock(return_value=[666])

        def dependency_search_side_effect(value):
            if value == 'valid_module':
                return [666]
            elif value == 'dependent_module':
                return [668]

        mock_dp.dependency_search = MagicMock()
        mock_dp.dependency_search.side_effect = dependency_search_side_effect

        def dependency_read_side_effect(value):
            if value == [666]:
                return ['dependent_module']
            else:
                return []

        mock_dp.dependency_read = MagicMock()
        mock_dp.dependency_read.side_effect = dependency_read_side_effect

        mock_dg = mock_dp('valid_module')
        test_hierarchy = Tree()
        test_hierarchy.create_node('valid_module', 'valid_module')
        test_hierarchy.create_node('dependent_module',
                                   'dependent_module',
                                   parent='valid_module')
        self.assertEqual(
            mock_dg.hierarchy.to_json(), test_hierarchy.to_json(),
            'get_hierarchy_for_module did not return nested dict when finding dependent modules'
        )

        # Mock Down
        mock_client.stop()
        mock_dp.module_search.stop()
        mock_client.search.stop()
        mock_client.search = orig_client_search
        mock_dp.dependency_search.stop()
        mock_dp.module_search = orig_mod_search
        mock_dp.dependency_search = orig_dep_search
Example #8
0
    def test_create_tree(self):

        file_one = FileModel(
            "Bernardoow/Elm-SqlAlchemy-Replace/blob/master/tests/Tests.elm",
            132,
            4.21,
            'KB',
            1,
            'elm')

        file_two = FileModel(
            "Bernardoow/Elm-SqlAlchemy-Replace/blob/master/src/test_model.py",
            73,
            2.62,
            'KB',
            1,
            'py')

        file_three = FileModel(
            "Bernardoow/Elm-SqlAlchemy-Replace/blob/master/readme.md",
            73,
            2.62,
            'Bytes',
            1,
            'md')

        tree_response = self.handler.create_tree([
            file_one,
            file_two,
            file_three
        ])

        tree = Tree()
        tree.create_node("[Bernardoow]", "Bernardoow")  # root node
        tree.create_node("[Elm-SqlAlchemy-Replace]", "Elm-SqlAlchemy-Replace",
                         parent="Bernardoow")
        tree.create_node("[blob]", "blob", parent="Elm-SqlAlchemy-Replace")
        tree.create_node("[master]", "master", parent="blob")
        tree.create_node("readme.md (73 linhas)", "readme.md", parent="master")
        tree.create_node("[tests]", "tests", parent="master")
        tree.create_node("Tests.elm (132 linhas)", "Tests.elm", parent="tests")
        tree.create_node("[src]", "src", parent="master")
        tree.create_node("test_model.py (73 linhas)", "test_model.py",
                         parent="src")

        self.assertEqual(tree_response.to_json(), tree.to_json())
	def test_03_get_hierarchy_for_module_returns_a_two_node_tree_when_another_module_depends_on_module(self, mock_client):
		"""
		Test that get_hierarchy_for_module returns a tree structure with two nodes if a dependent module is found
		:param mock_client: A mocked out version of erppeek.Client
		:return:
		"""
		# Mock Up
		mock_dp = DependencyGraph
		orig_mod_search = mock_dp.module_search
		orig_dep_search = mock_dp.dependency_search
		orig_client_search = mock_client.search
		mock_dp.module_search = MagicMock(return_value=[666])

		def dependency_search_side_effect(value):
			if value == 'valid_module':
				return [666]
			elif value == 'dependent_module':
				return [668]

		mock_dp.dependency_search = MagicMock()
		mock_dp.dependency_search.side_effect = dependency_search_side_effect

		def dependency_read_side_effect(value):
			if value == [666]:
				return ['dependent_module']
			else:
				return []

		mock_dp.dependency_read = MagicMock()
		mock_dp.dependency_read.side_effect = dependency_read_side_effect

		mock_dg = mock_dp('valid_module')
		test_hierarchy = Tree()
		test_hierarchy.create_node('valid_module', 'valid_module')
		test_hierarchy.create_node('dependent_module', 'dependent_module', parent='valid_module')
		self.assertEqual(mock_dg.hierarchy.to_json(), test_hierarchy.to_json(), 'get_hierarchy_for_module did not return nested dict when finding dependent modules')

		# Mock Down
		mock_client.stop()
		mock_dp.module_search.stop()
		mock_client.search.stop()
		mock_client.search = orig_client_search
		mock_dp.dependency_search.stop()
		mock_dp.module_search = orig_mod_search
		mock_dp.dependency_search = orig_dep_search
Example #10
0
def tree_test():
  test_titles = [
    "deletion_mapping", 
    "orthotheca", 
    "genetically_modified_sperm", 
    "category:intelligence"
  ]
  titles = dict((e, idx) for idx, e in enumerate(test_titles))

  # Tree testing
  t = Tree()
  t.create_node("deletion_mapping",15)
  t.create_node("orthotheca",14, parent=15)
  t.create_node("genetically_modified_sperm",13, parent=14)
  t.create_node("category:intelligence",12, parent=14)
  t.show()

  json = t.to_json()
  print "\nAs JSON:"
  print json

  print "\nAnd parsed back into a tree."
  t2 = from_json(json, titles)
  t2.show()
Example #11
0
      avg = 0.0
    if parent is not None:
        if data is not None:
            tree.create_node(id,id,data=data,parent=parent)
        else:
            tree.create_node(id,id,data=0,parent=parent)
    else:
        if data is not None:
            tree.create_node(id,id,data=data)
        else:
            tree.create_node(id,id,data=0)
    dataval[id] = 0
    cursor2.append([id,region,parent,data])
    region_db[id] = region

complete_data = tree.to_json(with_data=True)
resp = json.loads(complete_data)
print('JSON RESULT IS ',resp)

iterdict(resp)


tree21 = Tree()
for id,region,parent,data,avg in cursor3:
    data = None
    print(id,end=' ')
    print(region,end=' ')
    if parent is not None:
        print(int(parent),end=' ')
    else:
        print(None,end=' ')
Example #12
0
    def construct_tree(self, file_path, child_node_index, json_file_location):
        node_list = []

        try:
            directory = os.path.join(file_path)
            for root, dirs, files in os.walk(directory):
                for file in files:
                    if str(file).endswith(".csv"):
                        f = open(directory + file, 'r')
                        csv_reader = csv.reader(f, delimiter=',')
                        row_index = 0
                        filename = os.path.basename(f.name)
                        rows = []
                        dict = {"Root": "root", filename: filename.lower()}
                        esg_tree = Tree()
                        esg_tree.create_node(
                            "Root",
                            "root",
                            data=jsonpickle.encode(
                                NodeParam('source', 'attr', 'desc', 'root'),
                                unpicklable=False))  # root node
                        node = Node(
                            'root', 'Root', '', '',
                            jsonpickle.encode(NodeParam(
                                'source', 'attr', 'desc', 'root'),
                                              unpicklable=False))
                        node_list.append(node)
                        esg_tree.create_node(filename,
                                             filename.lower(),
                                             parent='root',
                                             data=jsonpickle.encode(
                                                 NodeParam(
                                                     'source', 'attr', 'desc',
                                                     str(uuid.uuid1())),
                                                 unpicklable=False))
                        node = Node(
                            filename.lower(), filename, 'root', '',
                            jsonpickle.encode(NodeParam(
                                'source', 'attr', 'desc', str(uuid.uuid1())),
                                              unpicklable=False))
                        node_list.append(node)

                        for row in csv_reader:
                            rows.append(row)

                        for row in rows:
                            if row_index != 0:
                                column_index = 0
                                # data = row
                                for curr_column in row:
                                    if str(curr_column) + str(
                                            row[0]) not in dict:
                                        if column_index > child_node_index:
                                            if "\n" in curr_column:
                                                for rowData in curr_column.splitlines(
                                                ):
                                                    node_id_key = str(
                                                        rowData) + str(row[0])
                                                    dict[
                                                        node_id_key] = uuid.uuid1(
                                                        )
                                                    esg_tree.create_node(
                                                        rowData,
                                                        str(
                                                            dict.get(
                                                                node_id_key)),
                                                        parent=str(
                                                            dict.get(
                                                                str(row[3]) +
                                                                str(row[0]))),
                                                        data=jsonpickle.
                                                        encode(NodeParam(
                                                            (rows[0]
                                                             )[column_index],
                                                            'attr',
                                                            str(rowData).lower(
                                                            ),
                                                            str(
                                                                dict.get(
                                                                    node_id_key
                                                                ))),
                                                               unpicklable=False
                                                               ))
                                                    node = Node(
                                                        str(
                                                            dict.get(
                                                                node_id_key)),
                                                        rowData,
                                                        str(
                                                            dict.get(
                                                                str(row[3]) +
                                                                str(row[0]))),
                                                        '',
                                                        jsonpickle.
                                                        encode(NodeParam(
                                                            (rows[0]
                                                             )[column_index],
                                                            'attr',
                                                            str(rowData).lower(
                                                            ),
                                                            str(
                                                                dict.get(
                                                                    node_id_key
                                                                ))),
                                                               unpicklable=False
                                                               ))
                                                    node_list.append(node)
                                            elif curr_column != '':
                                                node_id_key = str(
                                                    curr_column) + str(row[0])
                                                dict[node_id_key] = uuid.uuid1(
                                                )
                                                esg_tree.create_node(
                                                    curr_column,
                                                    str(dict.get(node_id_key)),
                                                    parent=str(
                                                        dict.get(
                                                            str(row[3]) +
                                                            str(row[0]))),
                                                    data=jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node = Node(
                                                    str(dict.get(node_id_key)),
                                                    curr_column,
                                                    str(
                                                        dict.get(
                                                            str(row[3]) +
                                                            str(row[0]))), '',
                                                    jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node_list.append(node)
                                        else:
                                            node_id_key = str(
                                                curr_column) + str(row[0])
                                            dict[node_id_key] = uuid.uuid1()
                                            if column_index == 0:
                                                esg_tree.create_node(
                                                    curr_column,
                                                    str(dict.get(node_id_key)),
                                                    parent=str(
                                                        dict.get(filename)),
                                                    data=jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node = Node(
                                                    str(dict.get(node_id_key)),
                                                    curr_column,
                                                    str(dict.get(filename)),
                                                    '',
                                                    jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node_list.append(node)
                                            else:
                                                esg_tree.create_node(
                                                    curr_column,
                                                    str(dict.get(node_id_key)),
                                                    parent=str(
                                                        dict.get(
                                                            str(row[
                                                                column_index -
                                                                1]) +
                                                            str(row[0]))),
                                                    data=jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node = Node(
                                                    str(dict.get(node_id_key)),
                                                    curr_column,
                                                    str(
                                                        dict.get(
                                                            str(row[
                                                                column_index -
                                                                1]) +
                                                            str(row[0]))), '',
                                                    jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node_list.append(node)
                                    column_index += 1
                            row_index += 1
                        f.close()
                    filename = filename.replace(".csv", '')
                    with open(json_file_location + filename + ".txt",
                              "wb") as outfile:
                        esg_tree.save2file(json_file_location + filename +
                                           ".json")
                        pickle.dump(esg_tree, outfile)
                    esgDatabase().add_data(node_list)
                    print(esg_tree.to_json(with_data=True))
            return 'success'
        except OSError:
            print("Path not found exception")
            return 'failed'
        except IOError:
            print('An error occurred trying to read the file.')
            f.close()
            return 'failed'
        except Exception as e:
            print("An error occurred while creating a tree")
            print(e)
            return 'failed'
Example #13
0
class StepParse:
    def __init__(self):
        pass

    def load_step(self, step_filename):

        self.nauo_lines = []
        self.prod_def_lines = []
        self.prod_def_form_lines = []
        self.prod_lines = []
        self.filename = os.path.splitext(step_filename)[0]

        line_hold = ''
        line_type = ''

        # Find all search lines
        with open(step_filename) as f:
            for line in f:
                # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines
                index = re.search("#(.*)=", line)
                if index:
                    # TH: if not none then it is the start of a line so read it
                    # want to hold line until it has checked next line
                    # if next line is a new indexed line then save previous line
                    if line_hold:
                        if line_type == 'nauo':
                            self.nauo_lines.append(line_hold)
                        elif line_type == 'prod_def':
                            self.prod_def_lines.append(line_hold)
                        elif line_type == 'prod_def_form':
                            self.prod_def_form_lines.append(line_hold)
                        elif line_type == 'prod':
                            self.prod_lines.append(line_hold)
                        line_hold = ''
                        line_type = ''

                    prev_index = True  # TH remember previous line had an index
                    if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line:
                        line_hold = line.rstrip()
                        line_type = 'nauo'
                    elif ('PRODUCT_DEFINITION ' in line
                          or 'PRODUCT_DEFINITION(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod_def'
                    elif 'PRODUCT_DEFINITION_FORMATION' in line:
                        line_hold = line.rstrip()
                        line_type = 'prod_def_form'
                    elif ('PRODUCT ' in line or 'PRODUCT(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod'
                else:
                    prev_index = False
                    #TH: if end of file and previous line was held
                    if 'ENDSEC;' in line:
                        if line_hold:
                            if line_type == 'nauo':
                                self.nauo_lines.append(line_hold)
                            elif line_type == 'prod_def':
                                self.prod_def_lines.append(line_hold)
                            elif line_type == 'prod_def_form':
                                self.prod_def_form_lines.append(line_hold)
                            elif line_type == 'prod':
                                self.prod_lines.append(line_hold)
                            line_hold = ''
                            line_type = ''
                    else:
                        #TH: if not end of file
                        line_hold = line_hold + line.rstrip()

        self.nauo_refs = []
        self.prod_def_refs = []
        self.prod_def_form_refs = []
        self.prod_refs = []

        # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words'
        # Find all (# hashed) line references and product names
        # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't
        for j, el_ in enumerate(self.nauo_lines):
            self.nauo_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_lines):
            self.prod_def_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_form_lines):
            self.prod_def_form_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_lines):
            self.prod_refs.append([
                el.strip(',') for el in el_.replace(",", " ").replace(
                    "(", " ").replace("=", " ").split() if el.startswith('#')
            ])
            self.prod_refs[j].append(el_.split("'")[1])

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j, el_ in enumerate(self.prod_all_refs):

            # Add 'PRODUCT_DEFINITION' ref
            for i, el in enumerate(self.prod_def_form_refs):
                if el[0] == el_[1]:
                    el_.append(el[1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i, el in enumerate(self.prod_refs):
                if el[0] == el_[2]:
                    el_.append(el[2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)
        #TH: find root node
        self.root_type_refs = set(self.parent_refs) - set(self.child_refs)

        # Create simple parts dictionary (ref + label)
        self.part_dict = {el[0]: el[3] for el in self.prod_all_refs}
#        self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs}

    def show_values(self):
        # TH: basic testing, if needed these could be spilt up
        print(self.nauo_lines)
        print(self.prod_def_lines)
        print(self.prod_def_form_lines)
        print(self.prod_lines)
        print(self.nauo_refs)
        print(self.prod_def_refs)
        print(self.prod_def_form_refs)
        print(self.prod_refs)

#    HR: "create_dict" replaced by list comprehension elsewhere
#
#    def create_dict(self):
#
#        # TH: links nauo number with a name and creates dict
#        self.part_dict  = {}
#        for part in self.all_type_refs:
#            for sublist in self.prod_def_refs:
#                if sublist[0] == part:
#                    prod_loc = '#' + re.findall('\d+',sublist[1])[0]
#                    pass
#            for sublist in self.prod_def_form_refs:
#                if sublist[0] == prod_loc:
#                    prod_loc = '#' + str(re.findall('\d+',sublist[1])[0])
#                    pass
#            for sublist in self.prod_refs:
#                if sublist[0] == prod_loc:
#                    part_name = sublist[2]
#
#            self.part_dict[part] = part_name

    def create_tree(self):

        #TH: create tree diagram in newick format
        #TH: find root node

        self.tree = Tree()
        #TH: check if there are any parts to make a tree from, if not don't bother
        if self.part_dict == {}:
            return

        root_node_ref = list(self.root_type_refs)[0]
        # HR added part reference as data for later use
        self.tree.create_node(self.part_dict[root_node_ref],
                              0,
                              data={'ref': root_node_ref})

        #TH: created root node now fill in next layer
        #TH: create dict for tree, as each node needs a unique name
        i = [0]  # Iterates through nodes
        self.tree_dict = {}
        self.tree_dict[i[0]] = root_node_ref

        def tree_next_layer(self, parent):
            root_node = self.tree_dict[i[0]]
            for line in self.nauo_refs:
                if line[1] == root_node:
                    i[0] += 1
                    self.tree_dict[i[0]] = str(line[2])
                    # HR added part reference as data for later use
                    self.tree.create_node(self.part_dict[line[2]],
                                          i[0],
                                          parent=parent,
                                          data={'ref': str(line[2])})
                    tree_next_layer(self, i[0])

        tree_next_layer(self, 0)
        self.appended = False

        self.get_levels()

    def get_levels(self):

        # Initialise dict and get first level (leaves)
        self.levels = {}
        self.levels_set_p = set()
        self.levels_set_a = set()
        self.leaf_ids = [el.identifier for el in self.tree.leaves()]
        self.all_ids = [el for el in self.tree.nodes]
        self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids)

        self.part_level = 1

        def do_level(self, tree_level):
            # Get all nodes within this level
            node_ids = [
                el for el in self.tree.nodes
                if self.tree.level(el) == tree_level
            ]
            for el in node_ids:
                # If leaf, then n_p = 1 and n_a = 1
                if el in self.leaf_ids:
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = self.part_level
                    self.levels[el]['n_a'] = self.part_level
                # If assembly, then get all children and sum all parts + assemblies
                else:
                    # Get all children of node and sum levels
                    child_ids = self.tree.is_branch(el)
                    child_sum_p = 0
                    child_sum_a = 0
                    for el_ in child_ids:
                        child_sum_p += self.levels[el_]['n_p']
                        child_sum_a += self.levels[el_]['n_a']
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = child_sum_p
                    self.levels[el]['n_a'] = child_sum_a + 1
                    self.levels_set_p.add(child_sum_p)
                    self.levels_set_a.add(child_sum_a + 1)

        # Go up through tree levels and populate lattice level dict
        for i in range(self.tree.depth(), -1, -1):
            do_level(self, i)

        self.create_lattice()

        self.levels_p_sorted = sorted(list(self.levels_set_p))
        self.levels_a_sorted = sorted(list(self.levels_set_a))

        # Function to return dictionary of item IDs for each lattice level
        def get_levels_inv(list_in, key):

            #Initialise
            levels_inv = {}
            levels_inv[self.part_level] = []
            for el in list_in:
                levels_inv[el] = []
            for k, v in self.levels.items():
                levels_inv[v[key]].append(k)

            return levels_inv

        self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p')
        self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a')

    def get_all_children(self, id_):

        ancestors = [el.identifier for el in self.tree.children(id_)]
        parents = ancestors
        while parents:
            children = []
            for parent in parents:
                children = [el.identifier for el in self.tree.children(parent)]
                ancestors.extend(children)
                parents = children
        return ancestors

    def create_lattice(self):

        # Create lattice
        self.g = nx.DiGraph()
        self.default_colour = 'r'
        # Get root node and set parent to -1 to maintain data type of "parent"
        # Set position to top/middle
        node_id = self.tree.root
        label_text = self.tree.get_node(node_id).tag
        self.g.add_node(node_id,
                        parent=-1,
                        label=label_text,
                        colour=self.default_colour)

        # Do nodes from treelib "nodes" dictionary
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                label_text = self.tree.get_node(key).tag
                # Node IDs same as for tree
                self.g.add_node(key,
                                parent=parent_id,
                                label=label_text,
                                colour=self.default_colour)

        # Do edges from nodes
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                self.g.add_edge(key, parent_id)

        # Escape if only one node
        # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT
        # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD
        if self.tree.size() == 1:
            id_ = [el.identifier for el in self.tree.leaves()]
            self.g.nodes[id_[-1]]['pos'] = (0, 0)
            return

        # Get set of parents of leaf nodes
        leaf_parents = set(
            [self.tree.parent(el).identifier for el in self.leaf_ids])

        # For each leaf_parent, set position of leaf nodes sequentially
        i = 0
        no_leaves = len(self.tree.leaves())
        for el in leaf_parents:
            for el_ in self.tree.is_branch(el):
                child_ids = [el.identifier for el in self.tree.leaves()]
                if el_ in child_ids:
                    self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1)
                    i += 1

        # To set plot positions of nodes from lattice levels
        # ---
        # Traverse upwards from leaves
        for el in sorted(list(self.levels_set_a)):
            # Get all nodes at that level
            node_ids = [k for k, v in self.levels.items() if v['n_a'] == el]
            # Get all positions of children of that node
            # and set position as mean value of them
            for el_ in node_ids:
                child_ids = self.tree.is_branch(el_)
                pos_sum = 0
                for el__ in child_ids:
                    pos_ = self.g.nodes[el__]['pos'][0]
                    pos_sum += pos_
                pos_sum = pos_sum / len(child_ids)
                self.g.nodes[el_]['pos'] = (pos_sum, el)

    def print_tree(self):

        try:
            self.tree.show()
        except:
            self.create_tree()
            self.tree.show()

    def tree_to_json(self, save_to_file=False, filename='file', path=''):

        #TH: return json format tree, can also save to file
        if self.tree.size() != 0:
            data = self.tree.to_json()
            j = json.loads(data)
            if save_to_file == True:
                if path:
                    file_path = os.path.join(path, filename)
                else:
                    file_path = filename

                with open(file_path + '.json', 'w') as outfile:
                    json.dump(j, outfile)

            return data
        else:
            print("no tree to print")
            return
Example #14
0
def tree_build_from_list(containers):
    """
    Build a tree based on a unsorted list.

    Build a tree of containers based on an unsorted list of containers.

    Example:
    --------
        >>> containers = [
            {
                "childContainerKey": null,
                "configlets": [],
                "devices": [],
                "imageBundle": "",
                "key": "root",
                "name": "Tenant",
                "parentName": null
            },
            {
                "childContainerKey": null,
                "configlets": [
                    "veos3-basic-configuration"
                ],
                "devices": [
                    "veos-1"
                ],
                "imageBundle": "",
                "key": "container_43_840035860469981",
                "name": "staging",
                "parentName": "Tenant"
            }]
        >>> print(tree_build_from_list(containers=containers))
            {"Tenant": {"children": [{"Fabric": {"children": [{"Leaves": {"children": ["MLAG01", "MLAG02"]}}, "Spines"]}}]}}
    Parameters
    ----------
    containers : dict, optional
        Container topology to create on CVP, by default None

    Returns
    -------
    json
        tree topology
    """
    # Create tree object
    tree = Tree()  # Create the base node
    previously_created = list()
    # Create root node to mimic CVP behavior
    tree.create_node("Tenant", "Tenant")
    # Iterate for first level of containers directly attached under root.
    for cvp_container in containers:
        if cvp_container['parentName'] is None:
            continue
        elif cvp_container['parentName'] in ['Tenant']:
            previously_created.append(cvp_container['name'])
            tree.create_node(cvp_container['name'],
                             cvp_container['name'],
                             parent=cvp_container['parentName'])
    # Loop since expected tree is not equal to number of entries in container topology
    while len(tree.all_nodes()) < len(containers):
        for cvp_container in containers:
            if tree.contains(
                    cvp_container['parentName']
            ):  # and cvp_container['parentName'] not in ['Tenant']
                try:
                    tree.create_node(cvp_container['name'],
                                     cvp_container['name'],
                                     parent=cvp_container['parentName'])
                except:  # noqa E722
                    continue
    return tree.to_json()
Example #15
0
def tree_build_from_dict(containers=None):
    """
    Build a tree based on a unsorted dictConfig(config).

    Build a tree of containers based on an unsorted dict of containers.

    Example:
    --------
        >>> containers = {'Fabric': {'parent_container': 'Tenant'},
            'Leaves': {'configlets': ['container_configlet'],
                        'devices': ['veos01'],
                        'images': ['4.22.0F'],
                        'parent_container': 'Fabric'},
            'MLAG01': {'configlets': ['container_configlet'],
                        'devices': ['veos01'],
                        'images': ['4.22.0F'],
                        'parent_container': 'Leaves'},
            'MLAG02': {'configlets': ['container_configlet'],
                        'devices': ['veos01'],
                        'images': ['4.22.0F'],
                        'parent_container': 'Leaves'},
            'Spines': {'configlets': ['container_configlet'],
                        'devices': ['veos01'],
                        'images': ['4.22.0F'],
                        'parent_container': 'Fabric'}}
        >>> print(tree_build_from_dict(containers=containers))
            {"Tenant": {"children": [{"Fabric": {"children": [{"Leaves": {"children": ["MLAG01", "MLAG02"]}}, "Spines"]}}]}}
    Parameters
    ----------
    containers : dict, optional
        Container topology to create on CVP, by default None

    Returns
    -------
    json
        tree topology
    """
    # Create tree object
    tree = Tree()  # Create the base node
    previously_created = list()
    # Create root node to mimic CVP behavior
    tree.create_node("Tenant", "Tenant")
    # Iterate for first level of containers directly attached under root.
    for container_name, container_info in containers.items():
        if container_info['parent_container'] in ['Tenant']:
            previously_created.append(container_name)
            tree.create_node(container_name,
                             container_name,
                             parent=container_info['parent_container'])
    # Loop since expected tree is not equal to number of entries in container topology
    while len(tree.all_nodes()) < len(containers) + 1:
        for container_name, container_info in containers.items():
            if tree.contains(
                    container_info['parent_container']
            ) and container_info['parent_container'] not in ['Tenant']:
                try:
                    tree.create_node(container_name,
                                     container_name,
                                     parent=container_info['parent_container'])
                except:  # noqa E722
                    continue
    return tree.to_json()
    def test_04_get_hierarchy_renames_duplicate_modules(self, mock_client):
        """
		Test that get_hierarchy_for_module renames the second instance of a dependent modules ID so can list that module multiple times
		- valid_module
		  - dependent_module_one
		  - dependent_module_two
		    - dependent_mdodule_one1

		:param mock_client: A mocked out version of erppeek.Client
		:return:
		"""
        # Mock Up
        mock_dp = DependencyGraph
        orig_mod_search = mock_dp.module_search
        orig_dep_search = mock_dp.dependency_search
        orig_client_search = mock_client.search
        mock_dp.module_search = MagicMock(return_value=[666])

        def dependency_search_side_effect(value):
            if value == 'valid_module':
                return [666]
            elif value == 'dependent_module_one':
                return [668]
            elif value == 'dependent_module_two':
                return [664]

        mock_dp.dependency_search = MagicMock()
        mock_dp.dependency_search.side_effect = dependency_search_side_effect

        def dependency_read_side_effect(value):
            if value == [666]:
                return ['dependent_module_one', 'dependent_module_two']
            elif value == [664]:
                return ['dependent_module_one']
            else:
                return []

        mock_dp.dependency_read = MagicMock()
        mock_dp.dependency_read.side_effect = dependency_read_side_effect

        mock_dg = mock_dp('valid_module')
        test_hierarchy = Tree()
        test_hierarchy.create_node('valid_module', 'valid_module')
        test_hierarchy.create_node('dependent_module_one',
                                   'dependent_module_one',
                                   parent='valid_module')
        test_hierarchy.create_node('dependent_module_two',
                                   'dependent_module_two',
                                   parent='valid_module')
        test_hierarchy.create_node('dependent_module_one',
                                   'dependent_module_one1',
                                   parent='dependent_module_two')
        self.assertEqual(
            mock_dg.hierarchy.to_json(), test_hierarchy.to_json(),
            'get_hierarchy_for_module did not return nested dict when finding dependent modules'
        )

        # Mock Down
        mock_client.stop()
        mock_dp.module_search.stop()
        mock_client.search.stop()
        mock_client.search = orig_client_search
        mock_dp.dependency_search.stop()
        mock_dp.module_search = orig_mod_search
        mock_dp.dependency_search = orig_dep_search
Example #17
0
class StepParse:
    def __init__(self):
        pass

    def load_step(self, step_filename):
        self.nauo_lines = []
        self.prod_def_lines = []
        self.prod_def_form_lines = []
        self.prod_lines = []
        self.filename = os.path.splitext(step_filename)[0]

        line_hold = ''
        line_type = ''

        # Find all search lines
        with open(step_filename) as f:
            for line in f:
                # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines
                index = re.search("#(.*)=", line)
                if index:
                    # TH: if not none then it is the start of a line so read it
                    # want to hold line until it has checked next line
                    # if next line is a new indexed line then save previous line
                    if line_hold:
                        if line_type == 'nauo':
                            self.nauo_lines.append(line_hold)
                        elif line_type == 'prod_def':
                            self.prod_def_lines.append(line_hold)
                        elif line_type == 'prod_def_form':
                            self.prod_def_form_lines.append(line_hold)
                        elif line_type == 'prod':
                            self.prod_lines.append(line_hold)
                        line_hold = ''
                        line_type = ''

                    prev_index = True  # TH rememeber previous line had an index
                    if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line:
                        line_hold = line.rstrip()
                        line_type = 'nauo'
                    elif ('PRODUCT_DEFINITION ' in line
                          or 'PRODUCT_DEFINITION(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod_def'
                    elif 'PRODUCT_DEFINITION_FORMATION' in line:
                        line_hold = line.rstrip()
                        line_type = 'prod_def_form'
                    elif ('PRODUCT ' in line or 'PRODUCT(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod'
                else:
                    prev_index = False
                    #TH: if end of file and previous line was held
                    if 'ENDSEC;' in line:
                        if line_hold:
                            if line_type == 'nauo':
                                self.nauo_lines.append(line_hold)
                            elif line_type == 'prod_def':
                                self.prod_def_lines.append(line_hold)
                            elif line_type == 'prod_def_form':
                                self.prod_def_form_lines.append(line_hold)
                            elif line_type == 'prod':
                                self.prod_lines.append(line_hold)
                            line_hold = ''
                            line_type = ''
                    else:
                        #TH: if not end of file
                        line_hold = line_hold + line.rstrip()

        self.nauo_refs = []
        self.prod_def_refs = []
        self.prod_def_form_refs = []
        self.prod_refs = []

        # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words'

        # Find all (# hashed) line references and product names
        # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't
        for j in range(len(self.nauo_lines)):
            self.nauo_refs.append([
                el.rstrip(',')
                for el in self.nauo_lines[j].replace(",", " ").replace(
                    "=", " ").split() if el.startswith('#')
            ])
        for j in range(len(self.prod_def_lines)):
            self.prod_def_refs.append([
                el.rstrip(',') for el in self.prod_def_lines[j].replace(
                    ",", " ").replace("=", " ").split() if el.startswith('#')
            ])
        for j in range(len(self.prod_def_form_lines)):
            self.prod_def_form_refs.append([
                el.rstrip(',') for el in self.prod_def_form_lines[j].replace(
                    ",", " ").replace("=", " ").split() if el.startswith('#')
            ])
        for j in range(len(self.prod_lines)):
            self.prod_refs.append([
                el.strip(',')
                for el in self.prod_lines[j].replace(",", " ").replace(
                    "(", " ").replace("=", " ").split() if el.startswith('#')
            ])
            self.prod_refs[j].append(self.prod_lines[j].split("'")[1])

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j in range(len(self.prod_all_refs)):

            # Add 'PRODUCT_DEFINITION' ref
            for i in range(len(self.prod_def_form_refs)):
                if self.prod_def_form_refs[i][0] == self.prod_all_refs[j][1]:
                    self.prod_all_refs[j].append(self.prod_def_form_refs[i][1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i in range(len(self.prod_refs)):
                if self.prod_refs[i][0] == self.prod_all_refs[j][2]:
                    self.prod_all_refs[j].append(self.prod_refs[i][2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j in range(len(self.prod_all_refs)):

            # Add 'PRODUCT_DEFINITION' ref
            for i in range(len(self.prod_def_form_refs)):
                if self.prod_def_form_refs[i][0] == self.prod_all_refs[j][1]:
                    self.prod_all_refs[j].append(self.prod_def_form_refs[i][1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i in range(len(self.prod_refs)):
                if self.prod_refs[i][0] == self.prod_all_refs[j][2]:
                    self.prod_all_refs[j].append(self.prod_refs[i][2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)
        #TH: find root node
        self.root_type_refs = set(self.parent_refs) - set(self.child_refs)
        self.create_dict()

    def show_values(self):
        # TH: basic testing, if needed these could be spilt up
        print(self.nauo_lines)
        print(self.prod_def_lines)
        print(self.prod_def_form_lines)
        print(self.prod_lines)
        print(self.nauo_refs)
        print(self.prod_def_refs)
        print(self.prod_def_form_refs)
        print(self.prod_refs)

    def create_dict(self):
        # TH: links nauo number with a name and creates dict
        self.part_dict = {}
        for part in self.all_type_refs:
            for sublist in self.prod_def_refs:
                if sublist[0] == part:
                    prod_loc = '#' + re.findall('\d+', sublist[1])[0]
                    pass
            for sublist in self.prod_def_form_refs:
                if sublist[0] == prod_loc:
                    prod_loc = '#' + str(re.findall('\d+', sublist[1])[0])
                    pass
            for sublist in self.prod_refs:
                if sublist[0] == prod_loc:
                    part_name = sublist[2]

            self.part_dict[part] = part_name

    def create_tree(self):
        #TH: create tree diagram in newick format
        #TH: find root node

        self.tree = Tree()
        #TH: check if there are any parts to make a tree from, if not don't bother
        if self.part_dict == {}:
            return

        root_node_ref = list(self.root_type_refs)[0]
        self.tree.create_node(self.part_dict[root_node_ref], 0)

        #TH: created root node now fill in next layer
        #TH: create dict for tree, as each node needs a unique name
        i = [0]  # itirates through nodes
        self.tree_dict = {}
        self.tree_dict[i[0]] = root_node_ref

        def tree_next_layer(self, parent):
            root_node = self.tree_dict[i[0]]
            for line in self.nauo_refs:
                if line[1] == root_node:
                    i[0] += 1
                    self.tree_dict[i[0]] = str(line[2])
                    self.tree.create_node(self.part_dict[line[2]],
                                          i[0],
                                          parent=parent)
                    tree_next_layer(self, i[0])

        tree_next_layer(self, 0)

    def print_tree(self):
        try:
            self.tree.show()
        except:
            self.create_tree()
            self.tree.show()

    def tree_to_json(self, save_to_file=False, filename='file', path=''):
        #TH: return json format tree, can also save to file
        if self.tree.size() != 0:
            data = self.tree.to_json()
            j = json.loads(data)
            if save_to_file == True:
                if path:
                    file_path = os.path.join(path, filename)
                else:
                    file_path = filename

                with open(file_path + '.json', 'w') as outfile:
                    json.dump(j, outfile)

            return data
        else:
            print("no tree to print")
            return
Example #18
0
class DataCrawler():
    USERNAME = "******"
    PASSWORD = "******"

    LOGIN_URL = "https://www.tgwiki.com/CookieAuth.dll?GetLogon?curl=Z2F&reason=0&formdir=9"
    URL = "https://www.tgwiki.com"

    DIRECTORY = ["RootFolder"]
    URL_suffix = ".aspx"

    EXCEPTION_MENU_ITEM = "Service Level Agreement"

    browser = None
    dataTree = None

    def __init__(self):
        # self.browser = webdriver.Chrome()
        self.array = []
        self.dataTree = Tree()
        self.dataTree.create_node("Homepage", "homepage/", data=self.URL)

    def login(self):
        self.browser.get(self.LOGIN_URL)
        username = self.browser.find_element_by_id('username')
        username.send_keys(self.USERNAME)
        password = self.browser.find_element_by_id('password')
        password.send_keys(self.PASSWORD)
        self.browser.find_element_by_id('SubmitCreds').click()
        self.browser.find_element_by_xpath('//a[@href="/department"]').click()
        self.browser.find_element_by_xpath(
            '//a[@href="/department/citd"]').click()
        html = self.browser.page_source
        return html

    def get_HTML_From_URL(self, url):
        print("Accessing " + str(url))
        self.browser.get(url)
        html = self.browser.page_source
        return html

    def get_menu(self, soup):
        result = soup.find(class_="menu vertical menu-vertical")
        result_in_static = result.findAll("li", class_="static")
        for ele in result_in_static:
            # print("-------------------------------------------------------")
            name = ele.find(class_="menu-item-text")
            inner_ele = ele.findAll("li", class_="dynamic")
            link = None
            parentID = "homepage/"
            if (inner_ele == []):
                # print(name.get_text())  # FOR DEBUGGING
                link_tag, link = self.parseLink(ele)
            self.dataTree.create_node(name.get_text(),
                                      parentID + name.get_text().lower() + "/",
                                      data=link,
                                      parent=parentID)
            if (self.isDirectory(link)):
                _html = self.get_HTML_From_URL(link)
                soup = BeautifulSoup(_html, "lxml")
                self.parseTable(soup,
                                parentID + link_tag.get_text().lower() + "/")
            # else: # FOR DEBUGGING
            #     print(name.get_text()) # FOR DEBUGGING

            if (name.get_text() == "Technology Update"):
                print("")
            # self.dataTree.show(idhidden=False)
            for small_ele in inner_ele:
                parentID = "homepage/" + name.get_text().lower() + "/"
                link_tag, link = self.parseLink(small_ele, _parent=parentID)
                print("CHECKING IF " + str(link) + " IS DIRECTORY...")
                #self.dataTree.show()
                # self.dataTree.create_node(small_ele.get_text(), small_ele.get_text().lower(), data=link, parent=name.get_text().lower())
                if (self.isDirectory(link)):
                    _html = self.get_HTML_From_URL(link)
                    soup = BeautifulSoup(_html, "lxml")
                    self.parseTable(
                        soup, parentID + link_tag.get_text().lower() + "/")
            # print("-------------------------------------------------------")

    def parseLink(self, soup_result, _parent=None):
        print("praseLink")
        link_tag = soup_result.a
        link = None
        if (link_tag != None):
            link = link_tag.get('href')
            # print(link_tag.get_text())
            # print(link)
            if (link[0] == '/'):
                link = self.URL + link
            if (_parent != None):
                print("############")
                print("Tag: " + str(link_tag))
                print("Text: " + link_tag.get_text())
                print("Link: " + str(link))
                print("Parent: " + str(_parent))
                print("############")
                # self.dataTree.show(idhidden=False)
                # if(link_tag.get_text() == self.EXCEPTION_MENU_ITEM):
                #     if(self.dataTree.contains(self.EXCEPTION_MENU_ITEM.lower())):
                #         print("dfgdfgdgdfgdfgd")
                #         return link_tag, link

                try:
                    self.dataTree.create_node(
                        str(link_tag.get_text()),
                        _parent + str(link_tag.get_text().lower() + "/"),
                        data=link,
                        parent=_parent)
                except treelib.tree.DuplicatedNodeIdError:
                    print("duplicated")
                    return link_tag, link

        # if(self.isDirectory(temp)):
        #     _html = self.get_HTML_From_URL(temp)
        #     soup = BeautifulSoup(_html, "lxml")
        #     self.parseTable(soup, link.get_text().lower())
        return link_tag, link

    def parseTable(self, soup_result, _parent=None):
        print("-------------------------------------------------------")
        print("parseTable")
        try:
            table_list = soup_result.findAll("table")
            for table in table_list:
                if (table.has_attr("summary")):
                    table_body = table.find('tbody')
                    row_list = table_body.findAll(
                        'td', attrs={"class": "ms-vb-title"})
                    for x in range(0, len(row_list)):
                        link_tag, link = self.parseLink(row_list[x], _parent)
                        if (self.isDirectory(link)):
                            _html = self.get_HTML_From_URL(link)
                            soup = BeautifulSoup(_html, "lxml")
                            self.parseTable(
                                soup,
                                _parent + link_tag.get_text().lower() + "/")
                    break

        except AttributeError as e:
            print(e)
        print("-------------------------------------------------------")

    def isDirectory(self, link):
        if (link == None):
            return False
        isDirectory = False
        if (self.URL_suffix == link[-5:]):
            return True
        elif (link[-1:] == "/"):
            print("dfasfdasfafafsdfdf")
            return True
        for directory in self.DIRECTORY:
            if (directory in link):
                isDirectory = True
        return isDirectory

    def writeToJSONFile(self, path, fileName, data):
        filePathNameWExt = './' + path + '/' + fileName + '.json'
        with open(filePathNameWExt, 'w') as fp:
            json.dump(data, fp)

    def main(self):
        _html = self.login()
        soup = BeautifulSoup(_html, "lxml")
        self.get_menu(soup)
        self.dataTree.show()
        tree_in_dict = self.dataTree.to_json(with_data=True)
        tree_in_json = json.dumps(tree_in_dict,
                                  indent=4,
                                  sort_keys=True,
                                  ensure_ascii=False)
        self.writeToJSONFile('./', 'training', tree_in_json)
        self.dataTree.save2file('tree_diagiam.json')

    # def process_node(self, node):
    #     if(node.)

    def test(self):
        file_directory = "./ITSM_training.json"
        json_data = open(file_directory).read()
        data = json.loads(json_data)
        hello = node.Tree(tree=data)
        print(hello)
Example #19
0
from treelib import Node, Tree
import json
with open('jsondata.txt') as json_file:
    data = json.load(json_file)
tree = Tree()
tree.create_node(identifier='0', data='<html></html>')
#print(len(data["tag"]))
key = {"sd"}
key.clear()
for k, v in data.items():
    for i in v:
        _id = str(i['id'])
        _tag = str(i['tag'])
        parent = i['parent']
        for x in parent:
            tree.create_node(identifier=_id, parent=str(x), data=_tag)
tree.show()
#tree.show()
x = tree.to_json()
print(x)
tree.save2file('tree.txt', data_property=True)
# print(x)
# print(key)
        #                                     except:
        #                                         continue
        #                                 else:
        #                                     continue
        #                         except:
        #                             continue
        #                     else:
        #                         continue
        #             except:
        #                 continue
        #         else:
        #             continue
        # else:
        #     continue


if __name__ == "__main__":
    main()
    # print(t.to_json(with_data=False))
    t.show()

    print(t.to_json(with_data=False))

    # for node in t.traverse("preorder"):
    # # Do some analysis on node
    #     print(node.render)

    # r = Rubrik()
    # r.add_parent("test")
    # r.printRubrik()
Example #21
0
    domain = tldextract.extract(url).domain  
    subdomain = tldextract.extract(url).subdomain
    if not (tree.contains(domain)):
        tree.create_node(domain, domain, parent="ID of root node") #Add domains to root node
    if subdomain:    
        tree.create_node(subdomain, subdomain+domain, parent=domain) #Add sub-domains to domain node



file.close()

tree.show(line_type="ascii-emv") #show data as stdout

tree.to_graphviz(filename="tree_graphviz") #dump tree as graphviz
#dot  xxx -Tps -o test.ps -Grankdir=LR #left to right 
subprocess.call(["dot", "tree_graphviz", "-Tps", "-o" ,"output.ps" ,"-Grankdir=LR"]) #Grankdir=LR option to build tree from left to right
#convert -flatten -density 150 -geometry 100% test.ps test.png
subprocess.call(["convert" ,"-flatten" ,"-density" ,"150" ,"-geometry" ,"100%" ,"output.ps" ,
                 "tree_graphviz.png"],stderr=subprocess.DEVNULL) #convert graphviz to png 
# rm -rf tree_graphviz output.ps
subprocess.call(["rm", "-rf", "tree_graphviz", "output.ps"]) #clear files


if os.path.exists("output.txt"): #dump tree as text file
    subprocess.call(["rm", "-rf", "output.txt"])
tree.save2file('output.txt',line_type="ascii-emv")
with open('output.json', 'w') as f: #dump tree as json form
    f.write(tree.to_json(with_data=True))