Python Tree.save2file 예제들, treelib.Tree.save2file Python 예제들

예제 #1

0

파일 보기

파일: pipelines.py 프로젝트: vlasy/skool

class TreePipeline(object):

    def open_spider(self, spider):
        self.tree = Tree()
        self.tree.create_node("root", "root")

    def process_item(self, item, spider):
        lst = item['text']
        lst = [x.strip() for x in [y.replace('...', '') for y in lst]]
        item['pagetitle'] = item['pagetitle'].replace('...', '')
        lst[-1] = item['pagetitle']
        for idx, elem in enumerate(lst):
            if idx == 0:
                previous = "root"
            else:
                previous = "|".join(lst[:idx])
            elem = "|".join(lst[:idx + 1])
            # elem = elem.replace('...', '')
            elem = elem.encode('utf-8').decode('utf-8')
            if not self.tree.contains(elem):
                print "Adding node %s" % elem
                self.tree.create_node(elem, elem, parent=previous)
                # self.tree.show()
        return item

    def close_spider(self, spider):
        self.tree.show()
        with open(makepath('data/cats/tree.json'), 'w') as outfile:
            outfile.write(self.tree.to_json())
        self.tree.save2file(makepath('data/cats/tree.tree'))

예제 #2

0

파일 보기

def visualizeCtree(c_tree):
    #bottom up build tree
    tree = Tree()
    tree.create_node("root", "root")
    levels = sorted(c_tree.keys())
    for level in levels:
        for node_id, cluster in c_tree[level].items():
            node_id = "{}.{}".format(level, node_id)
            tree.create_node("{}".format(cluster["pattern"]), node_id, parent="root")
            if level == 0:
                for data in cluster["data"]:
                    tree.create_node("log.{}".format(data), "log.{}".format(data), parent=node_id)
            else:
                for data in cluster["data"]:
                    tree.move_node("{}.{}".format(level-1, data), node_id)
    tree.show()
    tree.save2file("./tree")


## How to use
##1. for log in logs:updateCTree, updatePatterns
##2. get the c_tree
##3. train all the logs to get the max level and estimate the best level we keep.
##4. train the cluster on different level
##要考虑到unmerged list的同步问题, 到时production考虑将c_tree存在redis里，然后同时flush到库里

예제 #3

0

파일 보기

파일: lz78.py 프로젝트: tomgond/snipplets

 def print_prob_val(self, fname="OutTree.txt"):
     n_tree = Tree(tree=self.tree)
     for node in n_tree.nodes:
         node = n_tree.get_node(node)
         node.tag = "{tag} - {val} - {prob}".format(tag=node.tag, val=node.data[0], prob=node.data[1])
     n_tree.save2file(fname)
     self.tree =  None

예제 #4

0

파일 보기

def show_tree_of_riad_group(riad_group, mopdb):
    from treelib import Node, Tree
    tree = Tree()

    ## combine ORG RIAD codes and GH RIAD codes in one DF
    ##  DF to be used as input to get MFI data
    riad_as_input = riad_group[["ORG_RIAD_CODE", "ORG_ORGUNIT_NAME"]].copy()
    tmp = riad_group[["GH_RIAD_CODE", "GH_ORGUNIT_NAME"]].copy()
    tmp.rename(columns={
        "GH_RIAD_CODE": "ORG_RIAD_CODE",
        "GH_ORGUNIT_NAME": "ORG_ORGUNIT_NAME"
    },
               inplace=True)
    riad_as_input = riad_as_input.append(tmp)
    riad_as_input.drop_duplicates(inplace=True)
    riad_as_input.reset_index(drop=True, inplace=True)

    mfi_obj = mfis(riad_as_input, mopdb)
    mfi_data = mfi_obj.data

    #root:
    tree.create_node(
        riad_group["GH_ORGUNIT_NAME"][0],
        riad_group["GH_RIAD_CODE"][0],
        data=mfi(
            mfi_data[mfi_data['RIAD_CODE'] == riad_group["GH_RIAD_CODE"][0]]))

    i = 0
    for index, row in riad_group.iterrows():
        i = i + 1
        #if i==500:
        #tree.show(data_property="summary",  line_type="ascii-em")
        #break
        try:
            tree.create_node(
                row["ORG_ORGUNIT_NAME"],
                row["ORG_RIAD_CODE"],
                parent=row["DP_RIAD_CODE"],
                data=mfi(
                    mfi_data[mfi_data['RIAD_CODE'] == row["ORG_RIAD_CODE"]]))
        except:
            missing_dp_id = row["DP_RIAD_CODE"]
            add_missing_node(tree, riad_group, missing_dp_id, mfi_data)

    f = open('D:/tree.txt', "w+", encoding="utf8")
    f.write("")
    f.close()
    tree.show(data_property="summary", line_type="ascii-em")
    tree.save2file(filename='D:/tree.txt',
                   data_property="summary",
                   line_type="ascii-em")
    f = open('D:/tree.txt', "r", encoding="utf8")
    contents = f.readlines()
    f.close()

    return contents

예제 #5

0

파일 보기

파일: Caso2.py 프로젝트: nbellol/DocumentacionDjango

def generatetree(lista):
    tree = Tree() # se genera el arbol
    tree.create_node("calificador/dashboard/templates/dashboard", "raiz") # se agrega la raiz  como el dashboard
    for i in lista:
        if tree.get_node(i[0])==None:# se revisa si el archivo  esta en el arbol y se arregla
            tree.create_node(i[0],i[0],parent="raiz") # se agrega el archivo al arbol
    for i in lista:
        if tree.get_node(i[1]) == None: # se revisa si la iimagen se ha agregado al arbol
            tree.create_node(i[1],i[1],parent=i[0]) # se agrega la imagen al arbol
    tree.save2file("imagenes.txt")

예제 #6

0

파일 보기

파일: handler.py 프로젝트: kantravikumar/aws-auto-cleanup

def build_tree(tree_dict):
    """
    Build ASCI tree and upload to S3.
    """

    try:
        os.chdir(tempfile.gettempdir())
        tree = Tree()

        for aws in tree_dict:
            aws_key = aws
            tree.create_node(aws, aws_key)

            for region in tree_dict.get(aws):
                region_key = aws_key + region
                tree.create_node(region, region_key, parent=aws_key)

                for service in tree_dict.get(aws).get(region):
                    service_key = region_key + service
                    tree.create_node(service, service_key, parent=region_key)

                    for resource_type in tree_dict.get(aws).get(region).get(
                            service):
                        resource_type_key = service_key + resource_type
                        tree.create_node(resource_type,
                                         resource_type_key,
                                         parent=service_key)

                        for resource in tree_dict.get(aws).get(region).get(
                                service).get(resource_type):
                            resource_key = resource_type_key + resource
                            tree.create_node(resource,
                                             resource_key,
                                             parent=resource_type_key)

        try:
            _, temp_file = tempfile.mkstemp()

            tree.save2file(temp_file)

            client = boto3.client('s3')
            bucket = os.environ['RESOURCETREEBUCKET']
            key = 'resource_tree_%s.txt' % datetime.datetime.now().strftime(
                '%Y_%m_%d_%H_%M_%S')

            client.upload_file(temp_file, bucket, key)

            logging.info(
                "Resource tree has been built and uploaded to S3 's3://%s/%s'."
                % (bucket, key))
        finally:
            os.remove(temp_file)
    except:
        logging.critical(str(sys.exc_info()))

예제 #7

0

파일 보기

파일: Caso1.py 프로젝트: nbellol/DocumentacionDjango

def generateTree(informacion):
    #print(informacion)
    tree = Tree() # se genera el arbol
    tree.create_node("calificador/dashboard/models", "raiz") # se genera la raiz
    for i in informacion.keys(): # se recorren las llaves del diccionario
        if tree.get_node(i) == None:
            tree.create_node(i, i, parent="raiz") # se agregan las llaves como hizos del nodo raiz
    for i in informacion.keys():
        lis = informacion[i]  # se extrae la lista de menciones del cada llave
        for j in lis:# se recorren la lista
            if tree.get_node(j[0]) == None: # se revisa si la el archivo (valor 0 en la tupla) ya existe en el arbol si no se agrega el nodo hijo
                tree.create_node(j[0], j[0], parent=i)
        for k in lis: # se vuelve a recorrer la lista
            if tree.get_node(k[1]) == None: # Se revisa que si la mencion (valor 1 en la tupla) ya existe en el nodo y si no se agrega al arbol con el archivo como padre
                tree.create_node(k[1], k[1], parent=k[0])
    tree.save2file("Modulos.txt") #se guarde el arbol en un archivo

예제 #8

0

파일 보기

파일: bigcilin.py 프로젝트: catnlp/catnlp

def build_tree(source, target_dir):
    id_to_value = dict()
    parent_to_child = defaultdict(set)
    child_to_parent = defaultdict(set)
    with open(source, "r", encoding="utf-8") as sf:
        content = json.loads(sf.read())
        datas = content.get("@graph")
        for idx, data in enumerate(datas):
            if (idx + 1) % 100 == 0:
                print(f"index: {idx}")
            index = data.get("@id")
            value = data.get("label").get("@value")
            id_to_value[index] = value
            parents = data.get("subClassOf")
            if isinstance(parents, str):
                parent_to_child[parents].add(index)
                child_to_parent[index].add(parents)
            elif isinstance(parents, list):
                for parent in parents:
                    parent_to_child[parent].add(index)
                    child_to_parent[index].add(parent)
    root_set = get_root(child_to_parent)
    root_list = sorted(list(root_set))
    for root in root_list:
        print(id_to_value.get(root))

    target_dir = Path(target_dir)
    os.makedirs(target_dir)
    for root in root_list:
        tree = Tree()
        find_set = set()
        tree.create_node(id_to_value.get(root), root)
        tree = traverse_tree(tree, root, parent_to_child, id_to_value,
                             find_set)
        tree.show()
        target_file = target_dir / f"{id_to_value.get(root)}.txt"
        tree.save2file(target_file)

예제 #9

0

파일 보기

파일: test_clustering.py 프로젝트: zhoujiagen/giant-data-analysis

def show_cluster_tree(root):
    """
    展示聚类树.
    :param root: 根节点ClusterTreeNode.
    :return: None
    :raise None
    """
    tree = Tree()
    root_api_node = tree.create_node('[' + str(root.nid) + ']')

    def create_node(node, api_parent=None):
        """内部辅助创建API节点函数."""
        tag = '[' + str(node.nid) + ']'
        if node.data != '':
            tag += ': ' + node.data
        tag += '(' + str(node.distance) + ')'
        api_node = tree.create_node(tag, parent=api_parent)

        _left = node.left
        _right = node.right
        if _left is None and _right is None:
            return
        if _left:
            create_node(_left, api_node)
        if _right:
            create_node(_right, api_node)

    left = root.left
    right = root.right
    if left:
        create_node(left, root_api_node)
    if right:
        create_node(right, root_api_node)

    tree.show()
    tree.save2file(WORKING_DIR + 'cluster-tree.txt')

예제 #10

0

파일 보기

파일: ParentChildEvaluate.py 프로젝트: leiqichn/dom2vec

class ParentChildEvaluate:
    """
	Class to perform intrinsic evaluation of embeddings using the hierarchical relation of parent/child domains

	1) parse ParendChildTreeFile.txt from interpro
	2)	for each child of root
			nn = ask embeddings model to give M nearest neighbors
		calculate_precision_atM(child.descendants, nn)
		calculate_recall_atN(child.descendants, nn)
	3) plot histogram of precision and recall

	#Credits: https://medium.com/@m_n_malaeb/recall-and-precision-at-k-for-recommender-systems-618483226c54
	"""
    def __init__(self, data_path):
        """
		ParentChildEvaluate class init

		Parameters
		----------
		data_path : str
			full data path

		Returns
		-------
		None
		"""
        print("ParentChildEvaluate")
        self.data_path = data_path
        self.tree = Tree()

    def get_model_name(self):
        """
		Get embedding model name

		Parameters
		----------

		Returns
		-------
		str
			embedding model name
		"""
        return ntpath.basename(self.model_file)

    def load_emb_model(self, model_file, is_model_binary):
        """
		Load embedding model

		Parameters
		----------
		model_file : str
			model file name
		is_model_binary : bool
			model is saved in binary format (True), otherwise (False)

		Returns
		-------
		None
		"""
        self.model_file = model_file
        self.emb_model = KeyedVectors.load_word2vec_format(
            model_file, binary=is_model_binary)

    def parse_parent_child_file(self,
                                parent_child_file_name,
                                out_path,
                                output_file_name,
                                save_parsed_tree=False):
        """
		Parse the parent child file

		Parameters
		----------
		parent_child_file_name : str
			parent child file name
		out_path : str
			output data path
		output_file_name : str
			output file name
		save_parsed_tree : bool
			after parsing save parsed tree (True), otherwise (False)

		Returns
		-------
		None
		"""
        previous_num_minus_signs = 0
        last_interpro_id = None

        self.tree.create_node("INTERPRO", "INTERPRO")
        current_parent = "INTERPRO"
        with open(parent_child_file_name, 'r') as parent_child_file:
            for line in parent_child_file:
                line = line.strip()
                current_num_minus_signs = line[0:line.find("IPR")].count("--")
                double_colon_split = line.strip("--").split("::")
                interpro_id = double_colon_split[0]
                assert interpro_id[
                    0:
                    3] == "IPR", "AssertionError: {} \n interpro id should start with IPR and has length of 9.".format(
                        interpro_id)
                if current_num_minus_signs == 0:
                    # assert child not in the tree
                    current_parent = "INTERPRO"
                    self.tree.create_node(interpro_id,
                                          interpro_id,
                                          parent=current_parent)
                else:
                    # check if you are still with current parent or you need to create a new one
                    if current_num_minus_signs == previous_num_minus_signs:  # same level as last parent
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=current_parent)
                    elif current_num_minus_signs > previous_num_minus_signs:  # one level down from last parent -> create new parent
                        current_parent = last_interpro_id
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=current_parent)
                    else:  # one level up from last parent -> get parent of the current parent
                        if current_parent == "INTERPRO":  # if one level up is the root then your papa is the root
                            papa = "INTERPRO"
                        else:  # if one level up is not the root then get the parent of your parent (papa)
                            papa = self.tree[current_parent].bpointer
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=papa)
                        current_parent = papa
                previous_num_minus_signs = current_num_minus_signs
                last_interpro_id = interpro_id

        # quick test
        # for interpro_node in self.tree.children("IPR000549"):
        #	print(interpro_node.identifier)
        # self.tree.show()
        if save_parsed_tree:
            self.tree.save2file(
                filename=os.path.join(out_path, output_file_name))

    def get_nn_calculate_precision_recall_atN(self, N, plot_histograms,
                                              save_diagnostics):
        """
		Get nearest domain vector for each domains and calculate recall based on the ground truth (parsed tree)

		Parameters
		----------
		N : int
			number of nearest domain vector,
			if N==100 then retrieve as many as the children of a domain in the parsed tree
		plot_histograms : bool
			plot histograms for performance metrics (True), otherwise (False)
		save_diagnostics : bool
			save diagnostic plots for domain with low recall

		Returns
		-------
		None
		"""
        print("Get NN and calculate precision and recall at {}".format(N))
        recalls_n = []
        precisions_n = []
        interpros_recall0 = []
        interpros_num_children_recall0 = []

        if N == 100:
            retrieve_all_children = True
        else:
            retrieve_all_children = False

        for interpro_node in self.tree.children("INTERPRO"):
            recall_n = 0.0
            precision_n = 0.0
            all_children = self.tree.subtree(
                interpro_node.identifier).all_nodes()
            assert interpro_node in all_children, "AssertionError: parent {} is not in the set of all children.".format(
                interpro_node.identifier)
            all_children.remove(interpro_node)
            if retrieve_all_children:
                N = len(all_children)
            if self.emb_model.__contains__(interpro_node.identifier):
                nearest_neighbor_ids = set([
                    nn[0] for nn in self.emb_model.most_similar(
                        positive=interpro_node.identifier, topn=N)
                ])
            else:
                print("Model does not contain this id.")
                continue
            true_positives = set([child.identifier for child in all_children
                                  ]).intersection(nearest_neighbor_ids)
            assert len(all_children) > 0 and len(
                nearest_neighbor_ids
            ) == N, "AssertionError: For parent {} all children should be > 0 and nearest neighbors should be equal to N.".format(
                interpro_node.identifier)
            recall_n = len(true_positives) / len(all_children)
            precision_n = len(true_positives) / len(nearest_neighbor_ids)
            assert 0.0 <= recall_n <= 1.0 and 0.0 <= precision_n <= 1.0, "AssertionError: For parent {} recall or precision is not at (0,1]".format(
                interpro_node.identifier)
            recalls_n.append(recall_n)
            precisions_n.append(precision_n)
            if recall_n == 0.0:
                interpros_recall0.append(interpro_node.identifier)
                interpros_num_children_recall0.append(len(all_children))
        if retrieve_all_children:  # for printing in title
            N = 100
        if plot_histograms:
            if retrieve_all_children:
                self.plot_histogram(recalls_n, "Recall", "Recall",
                                    "Number of Interpro domains", "recall")
            else:
                self.plot_histogram(recalls_n, "Recall@{}".format(N), "Recall",
                                    "Number of Interpro domains",
                                    "recall_{}".format(N))
                self.plot_histogram(precisions_n, "Precision@{}".format(N),
                                    "Precision", "Number of Interpro domains",
                                    "precision_{}".format(N))
        if retrieve_all_children:
            avg_recall = sum(recalls_n) / len(recalls_n)
            print("Average recall at 100: {:.3f}".format(avg_recall))
        if save_diagnostics:
            self.save_diagnostics_recall0(interpros_recall0,
                                          interpros_num_children_recall0)

    def save_diagnostics_recall0(self, interpros_recall0,
                                 interpros_num_children_recall0):
        """
		Save diagnostics histogram for domains with recall of 0

		Parameters
		----------
		interpros_recall0 : list of str
			interpro ids with recall 0
		interpros_num_children_recall0 : list of str
			number of children of each interpro id, found from the parsed tree, with recall 0
		Returns
		-------
		None
		"""
        print("Saving diagnostics for intepro domains with recall 0")
        with open(
                os.path.join(
                    self.data_path,
                    self.get_model_name() + "_interpros_recall0" + ".txt"),
                "w") as interpros_recall0_file:
            # write file with names of interpro having recall 0
            interpros_recall0_file.write("\n".join(interpros_recall0))
        # plot histogram of number of children for interpro parents with recall 0
        self.plot_histogram(interpros_num_children_recall0, None,
                            "Number of Intepro domains", "Number of children",
                            "hist")

    def plot_histogram(self, performance_N, title, xlabel, ylabel, out_suffix):
        """
		Plot histogram for performance metric and also for the number of children

		Parameters
		----------
		performance_N : list of float
			performance metric value per parent domain
		title : str
			histogram title (if not None)
		xlabel : str
			label x
		ylabel : str
			label y
		out_suffix : str
			histogram output file name suffix

		Returns
		-------
		None
		"""
        # plot the histogram of lengths
        fig = plt.figure()
        plt.hist(performance_N,
                 color='g',
                 align='left',
                 edgecolor='k',
                 alpha=0.8)
        plt.xlabel(xlabel, fontsize=14)
        plt.ylabel(ylabel, fontsize=14)
        if title is not None:
            plt.title(title, fontsize=14)
        plt.xticks(np.arange(0, 1.1, 0.1))
        hist_name = self.get_model_name() + "_" + out_suffix + ".png"
        fig.savefig(os.path.join(self.data_path, hist_name),
                    bbox_inches='tight',
                    dpi=600)

예제 #11

0

파일 보기

파일: Day7.py 프로젝트: O-of/AdventOfCode2020

from treelib import Node, Tree

roots = find_root_nodes()
nodes_with_gold = [
    bag_to_obj[bag].desc for bag in bag_to_obj if bag_to_obj[bag].contains_gold
]

# for root in roots:
#     if root.desc not in nodes_with_gold:
#         continue
#
#     tree = Tree()
#
#     tree.create_node(root.desc, "0")
#     root.create_node(tree, "0", "0", True)
#
#
#     tree.show()
#     tree.save2file("trees.txt")

for root in roots:
    if root.desc not in nodes_with_gold:
        continue

    tree = Tree()

    tree.create_node(root.desc, root.desc)
    root.create_node(tree, root.desc, True)

    tree.save2file("trees.txt")

예제 #12

0

파일 보기

파일: git_batch.py 프로젝트: wujuguang/github-batch

class GitTool(object):
    def __init__(self, parent_path, shells, build_tree=False, log=None):
        """初始化操作目录, 操作命令.

            :parameter parent_path: 操作目录
            :parameter shells:　执行shell
            :parameter build_tree: 是否生成树形导航
            :parameter log: log文件
        """

        self._directory = parent_path
        self._unix_shell = shells
        self._log_file = log
        self._tree = None
        self._build_tree = build_tree

    def get_build_tree(self):
        return self._build_tree

    def set_build_tree(self, value):
        self._build_tree = value

    build_tree = property(get_build_tree, set_build_tree)

    def _print(self, info=''):
        if self._log_file:
            os.system("echo %s >> %s" % (info, self._log_file))
        else:
            print(info)

    def run_work(self):
        """对指定的操作目录, 执行指定的操作命令.
        """

        # 如果传入日志路径不存在则创建
        if self._log_file:
            dir_name = os.path.dirname(self._log_file)
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)
            if not os.path.exists(self._log_file):
                os.mknod(self._log_file)

        def build_tree(target_path):
            """创建树节点.

                :param target_path: 指定目录
            """

            if not self._build_tree:
                return

            self._tree = Tree()
            parent_name = os.path.basename(target_path)
            self._tree.create_node(parent_name, parent_name)

        def exist_node(sub_name):
            """指定节点是否存在.

                :param sub_name: 指定节点.
            """

            if not self._build_tree:
                return sub_name

            nid = 0
            while self._tree.contains(sub_name):
                sub_name = '_'.join((sub_name, str(nid)))
                nid += 1

            return sub_name

        def report_tree(target_path, out_file=True):
            """输出文件树.

                :param target_path: 指定节点.
                :param out_file: 指定节点.
            """

            if not self._build_tree:
                return

            if out_file:
                report_file = os.path.basename(target_path.strip(os.path.sep))
                self._tree.save2file('%s.txt' % report_file)
            else:
                self._tree.show()

        def process_target_path(target_path, target_tag=None):
            """对指定目录执行操作.

                :param target_path: 指定目录
                :param target_tag: 指定标签
            """

            # 判断路径是否存在
            if not os.path.exists(target_path):
                self._print("Directory does not exist!")
                return

            parent_name = os.path.basename(
                target_path) if not target_tag else target_tag

            # 遍历目录下的Git Repository
            for i in os.listdir(target_path):
                sub_path = os.path.join(target_path, i)
                sub_name = os.path.basename(sub_path)

                # sub_path类型为目录, 并且存在.git且为目录, 视为Git Repository
                git_path = os.path.join(sub_path, ".git")
                if os.path.isdir(sub_path):
                    sub_name = exist_node(sub_name)
                    if self._build_tree:
                        self._tree.create_node(sub_name,
                                               sub_name,
                                               parent=parent_name)

                    if os.path.exists(git_path) and os.path.isdir(git_path):
                        start_info = "Starting: %(sub_dir)s %(ph)s" % {
                            'sub_dir': i,
                            'ph': "." * (80 - len(i) - 1)
                        }
                        self._print(start_info)
                        os.system(self._unix_shell % sub_path)
                        self._print()
                    else:
                        process_target_path(sub_path, sub_name)

        if isinstance(self._directory, six.string_types):
            build_tree(self._directory)
            process_target_path(self._directory)
            report_tree(self._directory)
        elif isinstance(self._directory, (tuple, list)):
            for path in self._directory:
                build_tree(path)
                process_target_path(path)
                report_tree(path)
        else:
            pass

        self._print("Ok,All work is done!\r")

    def __call__(self):
        if self._log_file:
            now_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            self._print("%s %s %s" % ("=" * 35, now_time, "=" * 35))

        self.run_work()

예제 #13

0

파일 보기

파일: Task1.py 프로젝트: SamirSim/Browsing-Website

tree.create_node("Curlie Site",
                 "curlie")  #Create the root node, with "curlie" as ID

for category in categories:  #Loop through all the main categories
    tree.create_node(category.text, category.text, parent="curlie")
    #Create a node with as a name the name of the category, and as ID the name, and as parent the root node
    urls.append(
        (category.get_attribute("href"),
         category.text))  #Fill a list of couples of categorie's link and name

for url in urls:  #Loop through all the links saved previously
    driver.get(
        url[0]
    )  #Get the link of the category, and navigate through it using the webdriver
    parentId = url[1]  #Get the name of the category
    categories = driver.find_elements_by_xpath(
        '//section[@class="children"]/div/div[@class="cat-item"]/a')
    #Get all the children categories of the current category

    for category in categories:  #Loop through all the categories
        tree.create_node(category.text,
                         parentId + category.text,
                         parent=parentId)
        #Create a node with as a name the name of the category, and as ID the name of the father node appended with the category name (I explain why in the attached document), and as parent the root node
        urls.append((category.get_attribute("href"), parentId + category.text))
        #Add to the same list the new category link and name, so the program will pass through it too

tree.show()  #Display the final tree

tree.save2file('tree.txt')  #Save the structure into a text file

예제 #14

0

파일 보기

class DataCrawler():
    USERNAME = "******"
    PASSWORD = "******"

    LOGIN_URL = "https://www.tgwiki.com/CookieAuth.dll?GetLogon?curl=Z2F&reason=0&formdir=9"
    URL = "https://www.tgwiki.com"

    DIRECTORY = ["RootFolder"]
    URL_suffix = ".aspx"

    EXCEPTION_MENU_ITEM = "Service Level Agreement"

    browser = None
    dataTree = None

    def __init__(self):
        # self.browser = webdriver.Chrome()
        self.array = []
        self.dataTree = Tree()
        self.dataTree.create_node("Homepage", "homepage/", data=self.URL)

    def login(self):
        self.browser.get(self.LOGIN_URL)
        username = self.browser.find_element_by_id('username')
        username.send_keys(self.USERNAME)
        password = self.browser.find_element_by_id('password')
        password.send_keys(self.PASSWORD)
        self.browser.find_element_by_id('SubmitCreds').click()
        self.browser.find_element_by_xpath('//a[@href="/department"]').click()
        self.browser.find_element_by_xpath(
            '//a[@href="/department/citd"]').click()
        html = self.browser.page_source
        return html

    def get_HTML_From_URL(self, url):
        print("Accessing " + str(url))
        self.browser.get(url)
        html = self.browser.page_source
        return html

    def get_menu(self, soup):
        result = soup.find(class_="menu vertical menu-vertical")
        result_in_static = result.findAll("li", class_="static")
        for ele in result_in_static:
            # print("-------------------------------------------------------")
            name = ele.find(class_="menu-item-text")
            inner_ele = ele.findAll("li", class_="dynamic")
            link = None
            parentID = "homepage/"
            if (inner_ele == []):
                # print(name.get_text())  # FOR DEBUGGING
                link_tag, link = self.parseLink(ele)
            self.dataTree.create_node(name.get_text(),
                                      parentID + name.get_text().lower() + "/",
                                      data=link,
                                      parent=parentID)
            if (self.isDirectory(link)):
                _html = self.get_HTML_From_URL(link)
                soup = BeautifulSoup(_html, "lxml")
                self.parseTable(soup,
                                parentID + link_tag.get_text().lower() + "/")
            # else: # FOR DEBUGGING
            #     print(name.get_text()) # FOR DEBUGGING

            if (name.get_text() == "Technology Update"):
                print("")
            # self.dataTree.show(idhidden=False)
            for small_ele in inner_ele:
                parentID = "homepage/" + name.get_text().lower() + "/"
                link_tag, link = self.parseLink(small_ele, _parent=parentID)
                print("CHECKING IF " + str(link) + " IS DIRECTORY...")
                #self.dataTree.show()
                # self.dataTree.create_node(small_ele.get_text(), small_ele.get_text().lower(), data=link, parent=name.get_text().lower())
                if (self.isDirectory(link)):
                    _html = self.get_HTML_From_URL(link)
                    soup = BeautifulSoup(_html, "lxml")
                    self.parseTable(
                        soup, parentID + link_tag.get_text().lower() + "/")
            # print("-------------------------------------------------------")

    def parseLink(self, soup_result, _parent=None):
        print("praseLink")
        link_tag = soup_result.a
        link = None
        if (link_tag != None):
            link = link_tag.get('href')
            # print(link_tag.get_text())
            # print(link)
            if (link[0] == '/'):
                link = self.URL + link
            if (_parent != None):
                print("############")
                print("Tag: " + str(link_tag))
                print("Text: " + link_tag.get_text())
                print("Link: " + str(link))
                print("Parent: " + str(_parent))
                print("############")
                # self.dataTree.show(idhidden=False)
                # if(link_tag.get_text() == self.EXCEPTION_MENU_ITEM):
                #     if(self.dataTree.contains(self.EXCEPTION_MENU_ITEM.lower())):
                #         print("dfgdfgdgdfgdfgd")
                #         return link_tag, link

                try:
                    self.dataTree.create_node(
                        str(link_tag.get_text()),
                        _parent + str(link_tag.get_text().lower() + "/"),
                        data=link,
                        parent=_parent)
                except treelib.tree.DuplicatedNodeIdError:
                    print("duplicated")
                    return link_tag, link

        # if(self.isDirectory(temp)):
        #     _html = self.get_HTML_From_URL(temp)
        #     soup = BeautifulSoup(_html, "lxml")
        #     self.parseTable(soup, link.get_text().lower())
        return link_tag, link

    def parseTable(self, soup_result, _parent=None):
        print("-------------------------------------------------------")
        print("parseTable")
        try:
            table_list = soup_result.findAll("table")
            for table in table_list:
                if (table.has_attr("summary")):
                    table_body = table.find('tbody')
                    row_list = table_body.findAll(
                        'td', attrs={"class": "ms-vb-title"})
                    for x in range(0, len(row_list)):
                        link_tag, link = self.parseLink(row_list[x], _parent)
                        if (self.isDirectory(link)):
                            _html = self.get_HTML_From_URL(link)
                            soup = BeautifulSoup(_html, "lxml")
                            self.parseTable(
                                soup,
                                _parent + link_tag.get_text().lower() + "/")
                    break

        except AttributeError as e:
            print(e)
        print("-------------------------------------------------------")

    def isDirectory(self, link):
        if (link == None):
            return False
        isDirectory = False
        if (self.URL_suffix == link[-5:]):
            return True
        elif (link[-1:] == "/"):
            print("dfasfdasfafafsdfdf")
            return True
        for directory in self.DIRECTORY:
            if (directory in link):
                isDirectory = True
        return isDirectory

    def writeToJSONFile(self, path, fileName, data):
        filePathNameWExt = './' + path + '/' + fileName + '.json'
        with open(filePathNameWExt, 'w') as fp:
            json.dump(data, fp)

    def main(self):
        _html = self.login()
        soup = BeautifulSoup(_html, "lxml")
        self.get_menu(soup)
        self.dataTree.show()
        tree_in_dict = self.dataTree.to_json(with_data=True)
        tree_in_json = json.dumps(tree_in_dict,
                                  indent=4,
                                  sort_keys=True,
                                  ensure_ascii=False)
        self.writeToJSONFile('./', 'training', tree_in_json)
        self.dataTree.save2file('tree_diagiam.json')

    # def process_node(self, node):
    #     if(node.)

    def test(self):
        file_directory = "./ITSM_training.json"
        json_data = open(file_directory).read()
        data = json.loads(json_data)
        hello = node.Tree(tree=data)
        print(hello)

예제 #15

0

파일 보기

파일: faculty_pages_filtered.py 프로젝트: chmvkalyan/ExpertSearchCrawler

class FacultyPagesFilteredSpider(scrapy.Spider):
    name = 'faculty_pages_filtered'
    allowed_domains = [
        'cmu.edu', 'cornell.edu', 'washington.edu', 'gatech.edu',
        'princeton.edu', 'utexas.edu', 'illinois.edu', 'berkeley.edu'
        'mit.edu', 'stanford.edu'
    ]
    count = 0
    record = {}
    start_urls = [
        'https://www.cmu.edu/', 'https://www.cornell.edu/',
        'https://www.washington.edu/', 'https://www.gatech.edu/',
        'https://www.princeton.edu/', 'https://www.utexas.edu/',
        'https://illinois.edu/', 'https://www.berkeley.edu/',
        'https://www.mit.edu/', 'https://www.stanford.edu/'
    ]

    exclude_words = [
        'news', 'events', 'publications', 'pub', 'gallery', 'category',
        'courses', 'students', 'references', 'reference', 'software',
        'softwares', 'tags', 'tutorials', 'workshop', 'festival', 'admissions',
        'exhibitions', 'alumni', 'lectures', 'undergraduate', 'about',
        'history', 'awards', 'ranking', 'enrollment', 'graduate', 'archive',
        'stories', 'post', 'pages', 'magazine', 'curriculum', '404', 'faqs',
        'engage', 'campaign', 'career', 'resources', 'services', 'network',
        'security', 'donate', 'giving', 'finance', 'forms', 'policies',
        'policy', 'alphabetical', 'summer', 'winter', 'spring', 'autumn',
        'fall', 'health', 'facilities', 'facility', 'wp', 'information',
        'general', 'catalog', 'guides', 'library', 'publish', 'blog',
        'collection', 'share', 'search', 'periodicals', 'bookstore', 'store',
        'product', 'organisation', 'webstore', 'funding', 'pdf'
    ]

    rules = [Rule(LinkExtractor(unique=True), callback='parse', follow=True)]

    #count_limits = {"page_count": 200, "item_count": 200}

    def __init__(self):

        self.tree = Tree()
        self.tree.create_node("root", "root")
        self.tree.create_node("unknown", "unknown", parent="root")

        self.bio_identifier = BioIdentifier(model="bio-model")

        for dom in self.allowed_domains:
            domain = dom.split('.')[0]
            if not os.path.exists('Crawled_Data'):
                os.makedirs('Crawled_Data')

            folder_name = 'Crawled_Data/' + domain.capitalize(
            ) + '_University_Files'
            self.record[domain] = 0
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)

    def parse(self, response):

        matched_domain = [x for x in self.allowed_domains if x in response.url]
        if len(matched_domain) > 0:
            domain = matched_domain[0].split('.')[0]

            folder_name = 'Crawled_Data/' + domain.capitalize(
            ) + '_University_Files'

            self.record[domain] = self.record.get(domain, 0) + 1

            if self.record[domain] % 50 == 0:
                print('\n Crawled {} Bio-pages of {} University ...'.format(
                    self.record[domain], domain.capitalize()))
                self.tree.save2file(folder_name + "/00__" +
                                    str(self.record[domain]) + "_tree.txt")

            isBio = self.bio_identifier.is_bio_html_content(
                response.xpath('//*').get())

            if isBio:
                text = BeautifulSoup(response.xpath('//*').get(),
                                     features="html.parser").get_text()
                tokens = nltk.word_tokenize(text)
                normalized_text = ' '.join(
                    [word for word in tokens if word.isalnum()])
                normalized_text += '\n' + response.url

                hash_text = hashlib.md5(response.url.encode())
                file_name = hash_text.hexdigest()

                with open(folder_name + "/" + file_name + ".txt",
                          "w",
                          encoding="utf-8") as file:
                    file.write(normalized_text)

            AllLinks = LinkExtractor(allow_domains=domain + '.edu',
                                     unique=True).extract_links(response)

            for n, link in enumerate(AllLinks):
                if not any([x in link.url for x in self.exclude_words]):
                    if self.tree.get_node(link.url) == None:
                        referer = response.request.headers.get('Referer', None)

                        if referer == None:
                            self.tree.create_node(link.url,
                                                  link.url,
                                                  parent='root')
                        else:
                            referer = referer.decode("utf-8")
                            if self.tree.contains(referer):

                                self.tree.create_node(link.url,
                                                      link.url,
                                                      parent=referer)
                            else:
                                self.tree.create_node(link.url,
                                                      link.url,
                                                      parent='unknown')

                        yield scrapy.Request(url=link.url, callback=self.parse)

예제 #16

0

파일 보기

파일: save_tree_2_file.py 프로젝트: jmiqra/Compiler_Project_P3

# -*- coding: utf-8 -*-

from treelib import Tree

tree = Tree()
tree.create_node("Harry", "harry")  # root node
tree.create_node("Jane", "jane", parent="harry")
tree.create_node("Bill", "bill", parent="harry")
tree.create_node("Diane", "diane", parent="jane")
tree.create_node("Mary", "mary", parent="diane")
tree.create_node("Mark", "mark", parent="jane")
tree.save2file('tree.txt')

예제 #17

0

파일 보기

파일: family_tree.py 프로젝트: gokererdogan/ShapeGrammar

## Create the family tree
tree = Tree()
tree.create_node("Harry", "harry")  # root node
tree.create_node("Jane", "jane", parent="harry")
tree.create_node("Bill", "bill", parent="harry")
tree.create_node("Diane", "diane", parent="jane")
tree.create_node("George", "george", parent="diane")
tree.create_node("Mary", "mary", parent="diane")
tree.create_node("Jill", "jill", parent="george")
tree.create_node("Mark", "mark", parent="jane")


print("#"*4 + "Breakdown of out family")
tree.show(cmp=lambda x,y: cmp(x.tag, y.tag), key=None, reverse=True)
#tree.show(key=lambda x: x.tag, reverse=False)
tree.save2file("/home/chenxm/Desktop/tree.txt", idhidden=False)
print('\n')


print("#"*4 + "All family members in DEPTH mode")
for node in tree.expand_tree(mode=Tree.DEPTH):
    print tree[node].tag
print('\n') 


print("#"*4 + "All family members without Diane sub-family")
tree.show(idhidden=False, filter=lambda x: x.identifier != 'diane')
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag
print('\n')

예제 #18

0

파일 보기

파일: tree.py 프로젝트: magcurly/FunToolkit

tax_tree = Tree()
tax_tree.create_node('root', 'root')
filea = open(sys.argv[1], 'r')
head = filea.readline()
for line in filea.readlines():
    line = line.strip()
    arr = line.split('\t')
    upper = 'root'
    for i in range(1, len(arr)):
        if arr[i] == "NA": continue
        if arr[i] not in tax:
            tax.append(arr[i])
            tax_tree.create_node(arr[i], arr[i], parent=upper, data=arr[i])
        upper = arr[i]

tax_tree.save2file(sys.argv[2])

#print(tax_tree.to_dict())

#def convert_dict_to_stdjson(tree_dict):
#    js='"name":root,"children":'
#    for key in tree_dict:


def to_dict(tree,
            nid=None,
            key=None,
            sort=True,
            reverse=False,
            with_data=False):
    """Transform the whole tree into a dict."""

예제 #19

0

파일 보기

파일: linkedtree.py 프로젝트: Hamza-Megahed/LinkedTree

    domain = tldextract.extract(url).domain  
    subdomain = tldextract.extract(url).subdomain
    if not (tree.contains(domain)):
        tree.create_node(domain, domain, parent="ID of root node") #Add domains to root node
    if subdomain:    
        tree.create_node(subdomain, subdomain+domain, parent=domain) #Add sub-domains to domain node



file.close()

tree.show(line_type="ascii-emv") #show data as stdout

tree.to_graphviz(filename="tree_graphviz") #dump tree as graphviz
#dot  xxx -Tps -o test.ps -Grankdir=LR #left to right 
subprocess.call(["dot", "tree_graphviz", "-Tps", "-o" ,"output.ps" ,"-Grankdir=LR"]) #Grankdir=LR option to build tree from left to right
#convert -flatten -density 150 -geometry 100% test.ps test.png
subprocess.call(["convert" ,"-flatten" ,"-density" ,"150" ,"-geometry" ,"100%" ,"output.ps" ,
                 "tree_graphviz.png"],stderr=subprocess.DEVNULL) #convert graphviz to png 
# rm -rf tree_graphviz output.ps
subprocess.call(["rm", "-rf", "tree_graphviz", "output.ps"]) #clear files


if os.path.exists("output.txt"): #dump tree as text file
    subprocess.call(["rm", "-rf", "output.txt"])
tree.save2file('output.txt',line_type="ascii-emv")
with open('output.json', 'w') as f: #dump tree as json form
    f.write(tree.to_json(with_data=True))

예제 #20

0

파일 보기

파일: family_tree.py 프로젝트: jeffg2k/pyTree

## Create the family tree
tree = Tree()
tree.create_node("Harry", "harry")  # root node
tree.create_node("Jane", "jane", parent="harry")
tree.create_node("Bill", "bill", parent="harry")
tree.create_node("Diane", "diane", parent="jane")
tree.create_node("George", "george", parent="diane")
tree.create_node("Mary", "mary", parent="diane")
tree.create_node("Jill", "jill", parent="george")
tree.create_node("Mark", "mark", parent="jane")


print ("#" * 4 + "Breakdown of out family")
tree.show(cmp=lambda x, y: cmp(x.tag, y.tag), key=None, reverse=True)
# tree.show(key=lambda x: x.tag, reverse=False)
tree.save2file("/home/chenxm/Desktop/tree.txt", idhidden=False)
print ("\n")


print ("#" * 4 + "All family members in DEPTH mode")
for node in tree.expand_tree(mode=Tree.ZIGZAG):
    print tree[node].tag
print ("\n")


print ("#" * 4 + "All family members without Diane sub-family")
tree.show(idhidden=False, filter=lambda x: x.identifier != "diane")
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag
print ("\n")

예제 #21

0

파일 보기

    def construct_tree(self, file_path, child_node_index, json_file_location):
        node_list = []

        try:
            directory = os.path.join(file_path)
            for root, dirs, files in os.walk(directory):
                for file in files:
                    if str(file).endswith(".csv"):
                        f = open(directory + file, 'r')
                        csv_reader = csv.reader(f, delimiter=',')
                        row_index = 0
                        filename = os.path.basename(f.name)
                        rows = []
                        dict = {"Root": "root", filename: filename.lower()}
                        esg_tree = Tree()
                        esg_tree.create_node(
                            "Root",
                            "root",
                            data=jsonpickle.encode(
                                NodeParam('source', 'attr', 'desc', 'root'),
                                unpicklable=False))  # root node
                        node = Node(
                            'root', 'Root', '', '',
                            jsonpickle.encode(NodeParam(
                                'source', 'attr', 'desc', 'root'),
                                              unpicklable=False))
                        node_list.append(node)
                        esg_tree.create_node(filename,
                                             filename.lower(),
                                             parent='root',
                                             data=jsonpickle.encode(
                                                 NodeParam(
                                                     'source', 'attr', 'desc',
                                                     str(uuid.uuid1())),
                                                 unpicklable=False))
                        node = Node(
                            filename.lower(), filename, 'root', '',
                            jsonpickle.encode(NodeParam(
                                'source', 'attr', 'desc', str(uuid.uuid1())),
                                              unpicklable=False))
                        node_list.append(node)

                        for row in csv_reader:
                            rows.append(row)

                        for row in rows:
                            if row_index != 0:
                                column_index = 0
                                # data = row
                                for curr_column in row:
                                    if str(curr_column) + str(
                                            row[0]) not in dict:
                                        if column_index > child_node_index:
                                            if "\n" in curr_column:
                                                for rowData in curr_column.splitlines(
                                                ):
                                                    node_id_key = str(
                                                        rowData) + str(row[0])
                                                    dict[
                                                        node_id_key] = uuid.uuid1(
                                                        )
                                                    esg_tree.create_node(
                                                        rowData,
                                                        str(
                                                            dict.get(
                                                                node_id_key)),
                                                        parent=str(
                                                            dict.get(
                                                                str(row[3]) +
                                                                str(row[0]))),
                                                        data=jsonpickle.
                                                        encode(NodeParam(
                                                            (rows[0]
                                                             )[column_index],
                                                            'attr',
                                                            str(rowData).lower(
                                                            ),
                                                            str(
                                                                dict.get(
                                                                    node_id_key
                                                                ))),
                                                               unpicklable=False
                                                               ))
                                                    node = Node(
                                                        str(
                                                            dict.get(
                                                                node_id_key)),
                                                        rowData,
                                                        str(
                                                            dict.get(
                                                                str(row[3]) +
                                                                str(row[0]))),
                                                        '',
                                                        jsonpickle.
                                                        encode(NodeParam(
                                                            (rows[0]
                                                             )[column_index],
                                                            'attr',
                                                            str(rowData).lower(
                                                            ),
                                                            str(
                                                                dict.get(
                                                                    node_id_key
                                                                ))),
                                                               unpicklable=False
                                                               ))
                                                    node_list.append(node)
                                            elif curr_column != '':
                                                node_id_key = str(
                                                    curr_column) + str(row[0])
                                                dict[node_id_key] = uuid.uuid1(
                                                )
                                                esg_tree.create_node(
                                                    curr_column,
                                                    str(dict.get(node_id_key)),
                                                    parent=str(
                                                        dict.get(
                                                            str(row[3]) +
                                                            str(row[0]))),
                                                    data=jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node = Node(
                                                    str(dict.get(node_id_key)),
                                                    curr_column,
                                                    str(
                                                        dict.get(
                                                            str(row[3]) +
                                                            str(row[0]))), '',
                                                    jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node_list.append(node)
                                        else:
                                            node_id_key = str(
                                                curr_column) + str(row[0])
                                            dict[node_id_key] = uuid.uuid1()
                                            if column_index == 0:
                                                esg_tree.create_node(
                                                    curr_column,
                                                    str(dict.get(node_id_key)),
                                                    parent=str(
                                                        dict.get(filename)),
                                                    data=jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node = Node(
                                                    str(dict.get(node_id_key)),
                                                    curr_column,
                                                    str(dict.get(filename)),
                                                    '',
                                                    jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node_list.append(node)
                                            else:
                                                esg_tree.create_node(
                                                    curr_column,
                                                    str(dict.get(node_id_key)),
                                                    parent=str(
                                                        dict.get(
                                                            str(row[
                                                                column_index -
                                                                1]) +
                                                            str(row[0]))),
                                                    data=jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node = Node(
                                                    str(dict.get(node_id_key)),
                                                    curr_column,
                                                    str(
                                                        dict.get(
                                                            str(row[
                                                                column_index -
                                                                1]) +
                                                            str(row[0]))), '',
                                                    jsonpickle.
                                                    encode(NodeParam(
                                                        (rows[0]
                                                         )[column_index],
                                                        'attr',
                                                        str(curr_column).lower(
                                                        ),
                                                        str(
                                                            dict.get(
                                                                node_id_key))),
                                                           unpicklable=False))
                                                node_list.append(node)
                                    column_index += 1
                            row_index += 1
                        f.close()
                    filename = filename.replace(".csv", '')
                    with open(json_file_location + filename + ".txt",
                              "wb") as outfile:
                        esg_tree.save2file(json_file_location + filename +
                                           ".json")
                        pickle.dump(esg_tree, outfile)
                    esgDatabase().add_data(node_list)
                    print(esg_tree.to_json(with_data=True))
            return 'success'
        except OSError:
            print("Path not found exception")
            return 'failed'
        except IOError:
            print('An error occurred trying to read the file.')
            f.close()
            return 'failed'
        except Exception as e:
            print("An error occurred while creating a tree")
            print(e)
            return 'failed'

예제 #22

0

파일 보기

from treelib import Node, Tree
import json
with open('jsondata.txt') as json_file:
    data = json.load(json_file)
tree = Tree()
tree.create_node(identifier='0', data='<html></html>')
#print(len(data["tag"]))
key = {"sd"}
key.clear()
for k, v in data.items():
    for i in v:
        _id = str(i['id'])
        _tag = str(i['tag'])
        parent = i['parent']
        for x in parent:
            tree.create_node(identifier=_id, parent=str(x), data=_tag)
tree.show()
#tree.show()
x = tree.to_json()
print(x)
tree.save2file('tree.txt', data_property=True)
# print(x)
# print(key)

예제 #23

0

파일 보기

    while node_dict:
        for key, value in node_dict.items():
            if value['parent'] in added:
                tree.create_node(key, key, parent=value['parent'])
                added.add(key)
                node_dict.pop(key)
                break
            elif value['parent'] == None:
                tree.create_node(key, key)
                added.add(key)
                node_dict.pop(key)
                break
    tree_list.append(tree)

for tree in tree_list:
    tree.save2file("Processed_Skeleton_Trees.txt")

#######################################################################################################################

# Identify end nodes (leaves):
leaf_list = []
for i in range(len(tree_list)):
    tree = tree_list[i]
    leaves = tree.leaves(nid=None)
    for leaf in leaves:
        leaf = leaf.identifier
        leaf_list.append(leaf)

# Identify paths to leaves:
paths_list = []
for i in range(len(tree_list)):

예제 #24

0

파일 보기

    def build_tree(self, resource_tree):
        """
        Build ASCI tree and upload to S3.
        """

        try:
            os.chdir(tempfile.gettempdir())
            tree = Tree()

            for aws in resource_tree:
                aws_key = aws
                tree.create_node(aws, aws_key)

                for region in resource_tree.get(aws):
                    region_key = aws_key + region
                    tree.create_node(region, region_key, parent=aws_key)

                    for service in resource_tree.get(aws).get(region):
                        service_key = region_key + service
                        tree.create_node(service,
                                         service_key,
                                         parent=region_key)

                        for resource_type in (resource_tree.get(aws).get(
                                region).get(service)):
                            resource_type_key = service_key + resource_type
                            tree.create_node(resource_type,
                                             resource_type_key,
                                             parent=service_key)

                            for resource in (resource_tree.get(aws).get(
                                    region).get(service).get(resource_type)):
                                resource_key = resource_type_key + resource
                                tree.create_node(resource,
                                                 resource_key,
                                                 parent=resource_type_key)

            try:
                _, temp_file = tempfile.mkstemp()

                try:
                    tree.save2file(temp_file)
                except:
                    self.logging.error("Could not generate resource tree.")
                    return False

                client = boto3.client("s3")
                bucket = os.environ["RESOURCETREEBUCKET"]
                key = "resource_tree_%s.txt" % datetime.datetime.now(
                ).strftime("%Y_%m_%d_%H_%M_%S")

                try:
                    client.upload_file(temp_file, bucket, key)
                except:
                    self.logging.error(
                        f"Could not upload resource tree to S3 's3://{bucket}/{key}."
                    )
                    return False

                self.logging.info(
                    f"Resource tree has been built and uploaded to S3 's3://{bucket}/{key}."
                )
            finally:
                os.remove(temp_file)
            return True
        except:
            self.logging.error("Could not generate resource tree.")
            self.logging.error(sys.exc_info()[1])
            return False