Ejemplo n.º 1
0
    def find_element(self,
                     name: str,
                     maxlevel: int = 3) -> Optional[XmlElement]:
        """Finds the element with the given name in the document.

        Args:
            name (str): The name of the element to find.
            maxlevel (int, optional): The level at which the search will
            stop if not found. Defaults to 3.

        Returns:
            Optional[XmlElement]: The first element matching the name.
        """
        node = find(self.xml_document,
                    filter_=lambda node: node.name == name,
                    maxlevel=maxlevel)
        return cast(XmlElement, node)
Ejemplo n.º 2
0
    def _build_conditional_option_branch(
        self, conditional: XmlElement, parent: InputNode, option_value: Optional[str] = None
    ) -> None:
        """Builds a conditional branch in the input tree with the given 'option_value'.

        Args:
            conditional (XmlElement): The <conditional> XML element.
            parent (InputNode): The input node that will contain this branch.
            option_value (str): The value of the option selected in this conditional branch.
        """
        name = conditional.get_attribute(NAME)
        if name and option_value:
            conditional_node = ConditionalInputNode(name, option_value, element=conditional, parent=parent)
            when = find(
                conditional, filter_=lambda el: el.name == WHEN and el.get_attribute(VALUE) == option_value, maxlevel=2
            )
            when = cast(XmlElement, when)
            if when:
                self._build_input_tree(when, conditional_node)
Ejemplo n.º 3
0
 def __init__(self, graph, s):
     #print('Creating Stree')
     self.search_troot = Node("root", parent=None)
     for elements in nx.enumerate_all_cliques(graph):
         if len(elements) >= s + 1:
             break
         if len(elements) >= s - 1:
             templist = list(elements)
             # Popping last element so we can find its parent and create the child on St
             templist.pop()
             #st = time.time()
             nodefound = find(self.search_troot,
                              lambda node: node.name == templist)
             #ed = time.time()
             #print("Time took to search node : %s" % (ed - st))
             if nodefound is not None:
                 Node(elements, parent=nodefound)
             else:
                 Node(elements, parent=self.search_troot)
     #start = time.time()
     self.solidroot = copy.deepcopy(self.search_troot)  # close on ST
Ejemplo n.º 4
0
def buildTree(dataset):

    root = Node("root", support=0)

    #print dataset
    #print '-----------BEGIN-----------'
    #print ''
    for data in dataset:
        parent = 0
        for item in data:
            if parent == 0:
                parent = root

            child = find(parent,
                         filter_=lambda node: node.name == item,
                         stop=None,
                         maxlevel=2)
            """
			print 'Item : ' + item
			print 'parent'
			print parent
			print 'Child :'
			print child
			"""
            if child:
                #print "action : increment"
                child.support = child.support + 1
                parent = child
            else:
                #print "action : add"
                new_node = Node(item, parent=parent, support=1)
                parent = new_node

            #print ''
            #print ''

    return root
Ejemplo n.º 5
0
 def _addOperationsToFixVariantToQueue(self, variantToFix, k, tree, violatingCases, pastCost, changedCases, caseToSequenceDict):
     bestOperationCompliant, bestOperationViolating, minCostOfCurrentBestOption = self._initializeVariablesForaddOpertionsToFixVariantToQueue()
     violatingVariants = self.__getViolatingVariants(caseToSequenceDict,violatingCases)
     potentialTargetSequences = self._getPotentialTargetSequences(tree,violatingVariants,variantToFix,k)
     for targetSequence in potentialTargetSequences:
         if not self.__areSequencesTheSame(targetSequence, variantToFix[self.__variantDictName]):
             targetNode = find(tree, lambda node: node.sequence == targetSequence)
             if targetNode == None:
                 continue
             fixedCases = self._getCasesFixedByOperation(variantToFix,targetNode,k)
             costOfOperartion = self._getDistanceSequences(variantToFix[self.__variantDictName], targetSequence) * variantToFix[self.__variantDictCounterName]
             occuredCost = costOfOperartion + pastCost
             if (self.__greedy and occuredCost < minCostOfCurrentBestOption) or not self.__greedy: #If the cost by operation is higher without distance metric, there is no sense in even calculating one
                 projectedCost = self._getProjectedCost(violatingVariants,caseToSequenceDict,fixedCases,tree,occuredCost,k)
                 #Block operations that would create new violations -> otherwise the problem is not feasible
                 if len(targetNode.cases) >= k:
                     bestOperationCompliant = self._getNewBestOperationDict(bestOperationCompliant,occuredCost,projectedCost,targetSequence)
                 else:
                     bestOperationViolating = self._addOperationWithViolatingTargetToQueue(bestOperationViolating,changedCases,occuredCost,projectedCost,tree,targetSequence,variantToFix,caseToSequenceDict)
                 minCostOfCurrentBestOption = min(bestOperationViolating["projectedCost"],bestOperationCompliant["projectedCost"])
     if self.__greedy:
         self._addOperationsToQueueInHeuristicPRETSA(variantToFix,bestOperationCompliant,bestOperationViolating,tree,changedCases,caseToSequenceDict)
     if bestOperationCompliant.get("targetSequence", None) is not None:
         self.__addOperationToQueue(bestOperationCompliant["projectedCost"],variantToFix,tree,bestOperationCompliant["targetSequence"],bestOperationCompliant["occuredCost"],changedCases,caseToSequenceDict,False)
Ejemplo n.º 6
0
 def search_in_Solid_tree(self, nodename):
     nodeFound = find(self.solidroot, lambda node: node.name == nodename)
     return nodeFound
Ejemplo n.º 7
0
 def add_element(self, element_S):
     nodeFound = find(self.search_troot,
                      lambda node: node.name == element_S.name)
     nodeFound.parent = self.search_troot
Ejemplo n.º 8
0
def pgfs_result_to_newick(root):
    """ Convert PGFS result (tree with lifting attributes) to newick format. """

    out = StringIO()

    heads_and_offshoots = root.H
    offshoots = set()
    for n in heads_and_offshoots:
        node = find(root, filter_=lambda x: x.name == n)
        if node and node.is_leaf:
            offshoots.add(n)
    head_subjects = heads_and_offshoots - offshoots

    def traverse(node, colon=True):
        n_children = len(node.children)
        if n_children > 0:
            out.write('(')
            for i, child in enumerate(node.children, 1):
                traverse(child, i != n_children)
            out.write(') ')

        name_str = re.sub(',', '', node.name)
        name_str = re.sub(r'\(.*\)', '', name_str)

        if ' gaps' in node.name:
            out.write(
                f'{name_str} [&&NHX:p=0.0:e={node.depth}' +
                ':H={}:u=0.0:V=0.0:G={}:L={}:Hd=0:Of=0:Gap=1:Sq=1:ForceLabel=1]'
            )
        elif ' nodes' in node.name:
            out.write(
                f'{name_str} [&&NHX:p=0.0:e={node.depth}' +
                ':H={}:u=0.0:V=0.0:G={}:L={}:Hd=0:Of=0:Gap=0:Sq=1:ForceLabel=1]'
            )
        elif len(node.name) > 0:

            # def fmt_set(s):
            #     name = '{' + '; '.join(s) + '}'
            #     return name

            node_str = (
                f'{name_str} [&&NHX:'
                f'p={node.p:.3f}:'
                f'e={node.depth}:'
                #                 f'H={fmt_set(node.H)}:'
                f'u={node.u:.3f}:'
                f'V={node.V}:'
                #                 f'G={fmt_set(node.G)}:'
                #                 f'L={fmt_set(node.L)}:'
                f'Hd={int(node.name in head_subjects)}:'
                f'Of={int(node.name in offshoots)}:'
                f'Gap=0:'
                f'ForceLabel=0:'
                f'Sq=0]').replace(' -- ', '|')

            out.write(node_str)

        if colon: out.write(', ')
        out.write('\n')

    traverse(root, colon=False)
    out.write(';')

    return out.getvalue()
Ejemplo n.º 9
0
def question4(filename,
              new_names,
              prefix_length_max,
              prefix_length_min,
              prefix_length_step,
              rows,
              use_saved_model=False):
    """Aggregate the subnetworks responsible the top 10, the top 1 and the top 0.1 percent of traffic by volume.

    This is done using an N-ary tree using the anytree library.
    Writes the subnets to a file and creates a graphical representation of the resulting tree.

    Parameters:
    filename (String): The name of the file containing the data
    new_names (String): The new names of the columns of the data
    prefix_length_max (int): Max prefix length to consider
    prefix_length_min (int): Min prefix length to consider
    prefix_length_step (int): Steps from one prefix length to another, positive
    rows (int): Number of rows in the dataframe to consider
    use_saved_model (boolean): Whether we wish to use the grouped dataframe saved to disk or not

    Return:
    /

    """
    # Use saved pickle of grouped dataframe if wished so
    if (not use_saved_model):
        df = pd.read_csv(filename,
                         header=0,
                         delimiter=',',
                         names=new_names,
                         usecols=['src_addr', 'in_bytes'],
                         nrows=rows)

        total_traffic = df['in_bytes'].sum()
        np.save('total_traf.npy', total_traffic)

        countSubnets(df, "77.102.101.0/16", 24)

        # Count the IP addresses and sum their traffic
        df = df.groupby('src_addr', sort=False).agg({
            'src_addr': 'count',
            'in_bytes': 'sum'
        })
        df = df.rename_axis(None).reset_index()
        df.columns = ['src_addr', 'src_addr_frequency', 'sum_in_bytes']

        df.to_pickle("df_groupby_q4.pkl")
    else:
        df = pd.read_pickle("df_groupby_q4.pkl")
        total_traffic = np.load('total_traf.npy')

    # Node pointing at upper_level subnets
    curr_root = AnyNode(ip="root", frequency=0, traffic=0)

    # Create a tree to aggregate subnets
    for cnt, prefix_length in enumerate(
            range(prefix_length_max, prefix_length_min, -prefix_length_step)):
        if (cnt == 0):
            # Process every IP address
            for _, row in df.iterrows():
                subnet_of_ip = extractPrefix(row['src_addr'], prefix_length,
                                             True)

                # Find in the tree if the subnet exists alerady
                existing_subnet = find(
                    node=curr_root,
                    filter_=lambda node: node.ip == subnet_of_ip,
                    maxlevel=2)

                # If not, create a new one
                subnet = None
                if (existing_subnet is None):
                    subnet = AnyNode(parent=curr_root,
                                     ip=subnet_of_ip,
                                     frequency=0,
                                     traffic=0)
                else:
                    subnet = existing_subnet

                # Add the child (the ip address) and update the parent (subnet)
                AnyNode(parent=subnet,
                        ip=row['src_addr'],
                        frequency=row['src_addr_frequency'],
                        traffic=row['sum_in_bytes'])
                subnet.traffic += row['sum_in_bytes']
                subnet.frequency += row['src_addr_frequency']

        else:
            new_root = AnyNode(
                ip="new_root", frequency=0,
                traffic=0)  # Node pointing at upper_level subnets

            # The current root contains the subnets
            for subnet in curr_root.children:
                subnet_ip = str(subnet.ip).split('/')[0]
                subnet_of_subnet = extractPrefix(subnet_ip, prefix_length,
                                                 True)

                # Find in the tree if the subnet exists alerady
                existing_subnet = find(
                    node=new_root,
                    filter_=lambda node: node.ip == subnet_of_subnet,
                    maxlevel=2)

                # If not, create a new one and add the current subnet as his child
                if (existing_subnet is None):
                    AnyNode(parent=new_root,
                            children=[subnet],
                            ip=subnet_of_subnet,
                            frequency=subnet.frequency,
                            traffic=subnet.traffic)
                else:
                    subnet.parent = existing_subnet
                    existing_subnet.traffic += subnet.traffic
                    existing_subnet.frequency += subnet.frequency
            # Update the current root
            curr_root = new_root
            curr_root.ip = "root"

    # Update root node with it's childs attributes
    curr_root.traffic = sum(x.traffic for x in curr_root.children)
    curr_root.frequency = sum(x.frequency for x in curr_root.children)

    # # Save the tree as a graph
    # createTree(curr_root)

    # Count the number of prefix above a certain traffic threshold
    max_level = int(
        (prefix_length_max - prefix_length_min) / prefix_length_step) + 1

    top10_prefix_nodes = findall(
        node=curr_root,
        filter_=lambda node: node.traffic / total_traffic >= 0.10,
        maxlevel=max_level)
    top1_prefix_nodes = findall(
        node=curr_root,
        filter_=lambda node: node.traffic / total_traffic >= 0.01,
        maxlevel=max_level)
    top01_prefix_nodes = findall(
        node=curr_root,
        filter_=lambda node: node.traffic / total_traffic >= 0.001,
        maxlevel=max_level)
    top10_prefix = [x.ip for x in top10_prefix_nodes]
    top1_prefix = [x.ip for x in top1_prefix_nodes]
    top01_prefix = [x.ip for x in top01_prefix_nodes]

    with open('top10_prefix.txt', 'w') as f:
        for item in top10_prefix:
            f.write("{}\n".format(item))

    with open('top1_prefix.txt', 'w') as f:
        for item in top1_prefix:
            f.write("{}\n".format(item))

    with open('top01_prefix.txt', 'w') as f:
        for item in top01_prefix:
            f.write("{}\n".format(item))
Ejemplo n.º 10
0
 def find_node(self, product_id):
     return find(self.tree, lambda n: n.name == product_id)
Ejemplo n.º 11
0
 def get_node_by_storage_obj(self, storage_obj) -> DiskSnapshotStorageNode:
     assert self.root_node is not None
     return find(
         self.root_node,
         lambda node: node.storage_obj.disk_snapshot_storage_ident ==
         storage_obj.disk_snapshot_storage_ident)
Ejemplo n.º 12
0
    def generate_process_tree(self):
        proc_tree_copy = copy.deepcopy(self.PROCESS_TREE)
        proc_tree_untouched = copy.deepcopy(proc_tree_copy)

        ROOT = Node(name="root", proc=None)
        EXISTINGS_PIDS = {int(pid.split("_")[0]) for pid in proc_tree_copy.keys()}

        r = Resolver('name')
        while len(proc_tree_copy) > 0:
            for proc_item in list(proc_tree_copy.items()):
                key = proc_item[0]
                proc = proc_item[1]
                parent_proc = None
                try:
                    parent_proc = proc_tree_untouched["{}_0".format(proc.ppid)]
                except Exception:
                    pass

                #INTERESTING?
                interesting = False
                interesting_parent = False
                if self.is_proc_interesting(proc):
                    interesting = True

                if not parent_proc is None and self.is_proc_interesting(parent_proc):
                    interesting_parent = True

                pid = proc.pid
                ppid = proc.ppid

                try:
                    if interesting:
                        if interesting_parent:
                            #EXISTS
                            parent_node = find(ROOT, filter_=lambda node: node.name == str(ppid), maxlevel=1000)
                            if parent_node is None:
                                continue
                            else:
                                Node(name=str(pid), parent=parent_node, proc=proc)
                                del proc_tree_copy[key]
                        else:
                            Node(name=str(pid), parent=ROOT, proc=proc)
                            del proc_tree_copy[key]
                    else:
                        del proc_tree_copy[key]

                except ResolverError:
                    pass
                except ChildResolverError:
                    pass
                except IndexError:
                    pass
                #sys.exit(0)

        prev_space_count = 0
        for pre, fill, node in RenderTree(ROOT):
            proc = node.proc
            if proc is None:
                self.f.append(self.paragraph("<ROOT>\n".format(pre.replace(" ", "&nbsp;")), 'dynamicindent0'))
            else:
                self.f.append(self.paragraph("{}{} ({})".format(pre.replace(" ", "&nbsp;"), proc.image, proc.pid), 'dynamicindent0'))
                self.f.append(self.paragraph("Command line:{}".format(proc.commandline), "commandline"))



        #print(RenderTree(ROOT))
        '''#print("{} -> {}".format(pid, ppid))
Ejemplo n.º 13
0
 def __get_node_by_concept_name(self, concept_name):
     return at.find(
         self.root,
         filter_=lambda node: node.get_concept_name() == concept_name)
Ejemplo n.º 14
0
def ListState(request):
    min_sup = 2
    transactions = Transactions.objects.all()
    transaction_details = []
    item_frequency = {}

    for item in transactions:
        transaction_details.append(item.items_bought.split(','))

    print(transaction_details)
    #### itemset frequency ####
    for x in transaction_details:
        for y in x:
            if y in item_frequency.keys():
                item_frequency[y] = item_frequency[y] + 1
            else:
                item_frequency[y] = 1

    sorted_dict = {
        k: v
        for k, v in sorted(item_frequency.items(), key=lambda item: item[1])
    }

    priority_dict = {}

    i = 1
    for k, v in sorted_dict.items():
        priority_dict[k] = i
        i = i + 1

    ordered_transaction_list = []
    for transactions in transaction_details:
        innner_list = []
        for item in transactions:
            innner_list.append((item, priority_dict[item]))
        ordered_transaction_list.append(
            sorted(innner_list,
                   key=lambda innner_list: innner_list[1],
                   reverse=True))

    root = Node('root', count=1)
    #pdb.set_trace()
    for item_list in ordered_transaction_list:
        current_node = root
        for item in item_list:
            #print(item)
            #idx = item_list.index(item)
            #print(RenderTree(root, style=AsciiStyle()).by_attr())
            if item[0] in [node.name for node in current_node.children]:
                new_node = find(
                    current_node, lambda node: node.name == item[0] and node.
                    parent == current_node)
                count = new_node.count
                #print(count)
                new_node.count = count + 1
            else:
                new_node = Node(item[0], parent=current_node, count=1)
            current_node = new_node

    print(RenderTree(root, style=AsciiStyle()).by_attr('name'))
    #print(RenderTree(root))

    conditional_pattern_base_dict = {}

    #w = Walker()
    #w.walk(Node("root"), Node(key))

    for key, value in sorted_dict.items():
        conditional_pattern_base_dict[key] = findall(
            root, filter_=lambda node: node.name == key)
        #findall_by_attr(root, key, name=key, maxlevel=None, mincount=None, maxcount=None)
        #[find(root, lambda node: node.name == key)]

    print(conditional_pattern_base_dict['I5'][-1])

    #json_exporter = JsonExporter(indent=2, sort_keys=False)
    #dict_exporter = DictExporter()
    #dict_tree = dict_exporter.export(root)

    #print(dict_tree)

    #pdb.set_trace()
    RenderTreeGraph(root).to_picture(
        "lang_entropy/static/lang_entropy/images/tree.png")

    context = {
        'dict_transactions': sorted_dict,
        'ordered_list': ordered_transaction_list,
    }

    return render(request, 'lang_entropy/list_state.html', context)
Ejemplo n.º 15
0
def export_to_html():
    def base_html():
        """
        This function creates the following HTML structure:

            <!DOCTYPE NETSCAPE-Bookmark-file-1>
            <META CONTENT="text/html; charset=UTF-8" HTTP-EQUIV="Content-Type"></META>
            <TITLE>3RStore Resources</TITLE>
            <H1>3RStore</H1>
            <DL>
            <P TYPE="Main"></P>
            </DL><P></P>

        After the tag cleanup later on, what remains is the appropriate
        parsable form

            <!DOCTYPE NETSCAPE-Bookmark-file-1>
            <META CONTENT="text/html; charset=UTF-8" HTTP-EQUIV="Content-Type">
            <TITLE>3RStore Resources</TITLE>
            <H1>3RStore</H1>
            <DL><P TYPE="Main">
                {folder contents}
            </DL><P>
        """

        soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1>",
                             "html.parser")
        meta_tag = soup.new_tag("META")
        meta_tag["HTTP-EQUIV"] = "Content-Type"
        meta_tag["CONTENT"] = "text/html; charset=UTF-8"
        soup.append(meta_tag)

        title_tag = soup.new_tag("TITLE")
        title_tag.string = "3RStore Resources"
        soup.append(title_tag)

        header_tag = soup.new_tag("H1")
        header_tag.string = "3RStore"
        soup.append(header_tag)

        dl_tag = soup.new_tag("DL")
        p_tag = soup.new_tag("P")
        p_tag["TYPE"] = "Main"
        dl_tag.append(p_tag)
        soup.append(dl_tag)

        soup.append(soup.new_tag("P"))  # <P> closing the final </DL>

        return soup

    def new_bkmrk_folder(main_tag, folder_name, depth):
        """
        This functions creates an HTML structure like so:


            <DT><H3> {folder name} </H3></DT>
            <DL><P></P>
            </DL><P></P>

        After the tag cleanup later on, what remains is the appropriate
        parsable form

            <DT><H3> {folder name} </H3>
            <DL><P>
            </DL><P>
        """

        dt_tag = soup.new_tag("DT", ident=depth)
        h3_tag = soup.new_tag("H3", ident=depth)
        dl_tag = soup.new_tag("DL", ident=depth)
        p_tag = soup.new_tag("P", ident=depth)

        h3_tag.string = folder_name

        dt_tag.append(h3_tag)
        dl_tag.append(p_tag)

        main_tag.append(dt_tag)
        main_tag.append(dl_tag)
        main_tag.append(soup.new_tag(
            "P", ident=depth))  # To close each folder we created

        return p_tag

    def new_bkmrk_link(main_tag, title, link, depth):
        """
        This function creates an HTML structure like so:

            <DT><A {href = link}> {title} </A></DT>

        After the tag cleanup later on, what remains is the appropriate
        parsable form

            <DT><A {href = link}> {title} </A>

        """

        dt_tag = soup.new_tag("DT", ident=depth)

        a_tag = soup.new_tag("A", ident=depth)
        a_tag["HREF"] = link
        a_tag.string = title

        dt_tag.append(a_tag)
        main_tag.append(dt_tag)

    # Get all of the user's resources
    cur = conn.cursor()
    user_id = session["user_id"]

    cur.execute(
        ("""SELECT title, link, tags FROM resources WHERE user_id = %s ORDER BY tags"""
         ),
        (user_id, ),
    )

    user_resources = cur.fetchall()

    # Build relevant structure
    def_folder = Node(name="def", parent=None)  # Tree Root

    for res in user_resources:
        # Build a new node for each resource
        cur_res = cc.MixinResource(res[0], res[1], res[2], res[0], 0,
                                   0)  # Set name same as title
        tags = res[2]

        # If a resource has no tags, put it in the root folder
        if not tags:
            cur_res.parent = def_folder
            continue
        else:

            # Build every subfolder of the resource
            # Which means creating a new node for every tag
            prev_folder = def_folder
            for tag in tags:

                # Check if folder/node already exists
                potential_folder = find(def_folder,
                                        lambda node: node.name == tag)
                if not potential_folder:
                    new_folder = Node(name=tag)
                    new_folder.parent = (
                        prev_folder  # In the first iter this will be def_folder
                    )
                    prev_folder = new_folder
                else:
                    # So that despite not creating a new node the prev_folder
                    # Still holds the previous node/subfolder correctly
                    prev_folder = potential_folder

            # Add resource to the last Node/Folder
            cur_res.parent = find(def_folder,
                                  lambda node: node.name == tags[-1])

    # Handle the actual exporting
    soup = base_html()

    main_tag = soup.find("P", {"TYPE": "Main"})
    prev_folder = main_tag
    prev_was_res = False

    # Build the HTML string/file
    for node in PreOrderIter(def_folder):

        if type(node) == cc.MixinResource:
            new_bkmrk_link(prev_folder, node.title, node.link,
                           (node.depth + 1))

            if not prev_was_res:
                prev_was_res = True
        else:
            if prev_was_res:
                prev_folder = main_tag

            prev_folder = new_bkmrk_folder(prev_folder, node.name,
                                           (node.depth + 1))

    # Remove unecessary tags and add newlines
    final_text = (str(soup).replace("</META>", "\n").replace(
        "</TITLE>", "</TITLE>\n").replace("</H1>", "</H1>\n").replace(
            "<DT",
            "\n<DT").replace("<DL", "\n<DL").replace("</P>", "").replace(
                "</DT>", "").replace("</DL><P", "\n</DL><P"))

    # Add proper identation
    split_text = final_text.splitlines()
    for idx, line in enumerate(split_text):
        res = r.search(r"(?<=.ident=\")\d+", line)

        if res:
            tabs = int(res.group(0))
            split_text[idx] = "\t" * tabs + line

            # Remove the custom"ident" property
            split_text[idx] = r.sub(r" ident=\"\d+\"", "", split_text[idx])

    final_text = "\n".join(split_text)

    # Save text to byte object
    strIO = BytesIO()
    strIO.write(str.encode(final_text))
    strIO.seek(0)

    # Send html file to client
    return send_file(strIO,
                     attachment_filename="3RStore_export.html",
                     as_attachment=True)