def load_from_file(path):
    path = osp.expanduser(path)
    if osp.exists(osp.dirname(path)):
        dict_imp = DictImporter(nodecls=DPNode)
        imp = JsonImporter(dictimporter=dict_imp,
                           object_hook=tools.object_hook)
        with open(path, "r") as f:
            res = imp.read(f)
        return RemoteTree(res)
    else:
        print("Error saving to disk. Dir {} not existing.".format(
            osp.dirname(path)))
Exemple #2
0
import sys
from anytree.importer import JsonImporter
from anytree import *

if __name__ == '__main__':
    filename = sys.argv[1]
    importer = JsonImporter()
    with open(filename) as filehandle:
        tree = importer.read(filehandle)

    print(filename + ' Tree', end='\n\n')
    print(RenderTree(tree))
import json  # Base library
from xml.dom import minidom

from anytree import RenderTree  # pip3 install anytree
from anytree.importer import DictImporter
from anytree.importer import JsonImporter
from sbomify.sbomlibrary import SbomLibrary

tree = {}

with open("out.json", "r") as f:
    baseimporter = DictImporter(nodecls=SbomLibrary)
    importer = JsonImporter(dictimporter=baseimporter)
    tree = importer.read(f)

tree

# print(RenderTree(tree))
thestr = tree.toSWID()
print(thestr)
# reparsed = minidom.parseString(thestr)
# print(reparsed.toprettyxml(indent="  "))
Exemple #4
0
    m = main_xml.find('processTree')
    for e in content_to_copy.find('processTree'):
        m.append(e)


def changing_id(xml, needle, replace_by):
    for e in xml.find('processTree'):
        for a in ['id', 'sourceId', 'targetId']:
            if a in e.attrib.keys():
                if e.attrib[a] == needle:
                    e.attrib[a] = replace_by


importer = JsonImporter()
with open('1_split/rendertree.json', 'r') as f:
    root = importer.read(f)

main = ET.parse('2_gather/{}.ptml'.format(root.name))

print(RenderTree(root))
for c in root.children:
    sub = ET.parse('2_gather/{}.ptml'.format(c.name))
    id_in_main = get_by_name(main, c.name).attrib['id']
    id_in_sub = sub.find('processTree').attrib['root']
    delete_node(main, id_in_main)

    changing_id(sub, id_in_sub, id_in_main)
    add_content(main, sub)

main.write('2_gather/pt.ptml')
Exemple #5
0
def _deserialize_ontology(filename):
  """Deserializes an ontology from a JSON file and returns its root."""
  importer = JsonImporter()
  return importer.read(filename)
def main():
    parser = get_args()
    if not sys.argv[1:]:
        sys.exit(parser.print_help())
    #Load args
    args = parser.parse_args()
    n = 0
    importer = JsonImporter()
    with open(args.three_prime_template_file) as json_data:
        three_prime_template = importer.read(json_data)
    with open(args.five_prime_template_file) as json_data:
        five_prime_template = importer.read(json_data)
    hairpins_list=set()
    reference = defaultdict()
    with open(args.hairpin_path, 'rt') as hairpins, open(args.mature_path, 'rt') as mature: 
        #Load hairpins ref
        all_lines = hairpins.read()
        hairpins = all_lines.split('>')
        del hairpins[0]
        for line in hairpins:
            line = line.split('\n',1)
            hairpins_list.add(line[1].replace('\n',''))
        #Load mature RNA
        for line in mature:
            line2 = next(mature).strip()
            name = line.strip()
            if line2 in reference:
                reference[line2]['name'] = '{}={}'.format(reference[line2]['name'],name.lstrip('>')) 
            else:
                reference[line2]=defaultdict()
                reference[line2]['name'] = name
            reference[line2]['5_prime_tree'] = copy.deepcopy(five_prime_template)
            reference[line2]['3_prime_tree'] = copy.deepcopy(three_prime_template)
        num_seqs = len(reference.keys())
        widgets = [progressbar.Percentage(), progressbar.Bar()]
        print('Mapping isomir modifications to reference.')
        count=0    
        bar = progressbar.ProgressBar(widgets=widgets, max_value=num_seqs).start()
        for sequence in reference.keys():
            match_one_end(sequence=sequence,reference_dict=reference[sequence],end=5,hairpin_reference=hairpins_list)
            match_one_end(sequence=sequence,reference_dict=reference[sequence],end=3,hairpin_reference=hairpins_list)
            count+=1
            bar.update(count)
        print()
        print('Generating new reference.')
        count = 0
        new_reference = defaultdict()
        bar = progressbar.ProgressBar(widgets=widgets, max_value=num_seqs).start()
        for sequence in reference.keys():
            new_reference = generate_string_general(sequence, reference, new_reference)
            count+=1
            bar.update(count)
        print()
        print('Checking for multireads.')
        with open(args.out_path,'w') as out_file:
            with open(args.multireads_path,'w') as multireads_out:
                for sequence in new_reference:
                    if(len(new_reference[sequence])>1):
                        ordered_names = order_multireads(names=new_reference[sequence])
                        multireads_out.write('{}\n{}\n\n'.format('\n'.join(ordered_names), sequence))
                        out_file.write('{}_multi\n{}\n'.format(ordered_names[0],sequence))
                    else:
                        out_file.write('{}_unique\n{}\n'.format(next(iter(new_reference[sequence])),sequence))
        print('Isomir reference created.')
Exemple #7
0
    def analyze(self):
        """Do everything."""
        def reset_display():
            # Reset display
            self.suffix = "?JSTool=none"
            self.script_buttons.clear()
            self.choice_boxes.clear()
            self.number_of_buttons = 0
            # self.diff_btn.Show()
            self.apply_btn.Show()
            self.save_btn.Show()
            self.content_panel.Show()
            self.content_text.SetValue("Script code")
            while self.script_sizer.GetChildren():
                self.script_sizer.Hide(0)
                self.script_sizer.Remove(0)
            self.images.clear()

        def get_index_html():
            # Get index.html from remote proxy
            return get_resource(self.url)

        def parse_html(html: str):
            # Add index.html scripts to self.script_tree
            cnt = 1
            if not html:
                return
            while "<script" in html:
                src = ""
                script_name = "script" + str(cnt)
                start_index = html.find("<script")
                end_index = html.find("</script>")
                text = html[start_index:end_index + 9]
                new_node = AnyNode(id=script_name,
                                   parent=self.script_tree,
                                   content=text,
                                   vector=extract_features(text),
                                   count=1)
                if ' src="' in text:  # BeautifulSoup turns all single quotes into double quotes
                    src = text.split(' src="')[1].split('"')[0]
                    src = self.format_src(src)
                    try:
                        node = anytree.cachedsearch.find(
                            self.script_tree, lambda node: node.id == src)
                    except anytree.search.CountError:
                        logging.warning(
                            'multiple possible parents: more than one node with id = %s',
                            src)
                    if node:
                        node.parent = new_node
                html = html.replace(text, "\n<!--" + script_name + "-->\n")
                cnt += 1

        def create_buttons():
            # Add checkboxes to display
            # Check all
            self.add_button('Check all', 0, 1, None)

            index = 1
            # All other script checkboxes
            for node in PreOrderIter(self.script_tree):
                if node.is_root:
                    continue
                node.button = index
                # vector = extract_features(node.content)
                self.add_button(node.id, index, node.depth,
                                get_attribute(node, 'vector'))  # node.count
                checkbox = self.script_buttons[index]
                if (get_attribute(checkbox, 'confidence') is not None
                        and get_attribute(
                            checkbox, 'confidence') < CONFIDENCE_THRESHOLD):
                    # run clustering if confidence less than threshold
                    checkbox.category = CLUSTER.predict(script=str(
                        node.content),
                                                        preprocess=True)
                    label = get_attribute(checkbox, 'label')
                    if label:
                        label.SetLabel(checkbox.category)
                        label.SetBackgroundColour(
                            tuple(CATEGORIES[checkbox.category]['color']))
                        label.SetToolTip(
                            CATEGORIES[checkbox.category]['description'])
                if get_attribute(checkbox,
                                 'category') not in BLOCKED_CATEGORIES:
                    # ads / marketing scripts disabled by default
                    try:
                        if node.id[:6] != "script":
                            self.blocked_urls.remove(node.id)
                    except ValueError:
                        logging.debug("Could not remove %s from blocked urls",
                                      node.id)
                    self.check_boxes(True, node)
                index += 1
            self.scripts_panel.SetSizer(self.script_sizer)
            self.frame.frame_sizer.Layout()

        def functional_dependency():
            # functional dependencies?
            try:
                tmp_dep = perf.get_dependency(self.url)
                # tmp_dep = [['https://ws.sharethis.com/button/async-buttons.js', 'https://www.google-analytics.com/analytics.js', 'https://ws.sharethis.com/button/buttons.js'], ['https://www.googletagmanager.com/gtm.js?id=GTM-WBDQQ5', 'https://www.googleadservices.com/pagead/conversion_async.js'], ['https://www.unicef.org/sites/default/files/js/js_B7pS3ddmNLFYOJi3j28odiodelMu-EhaOeKlHZ8E6y0.js', 'https://www.unicef.org/themes/custom/unicef/assets/src/js/init-blazy.js?v=1.x', 'https://www.unicef.org/sites/default/files/js/js_dWWS6YNlsZWmXLboSy3PIiSD_Yg3sRxwjbMb52mdNyw.js', 'https://www.unicef.org/sites/default/files/js/js_cLlwgRdoiVfjtFxLqlXX-aVbv3xxfX_uMCsn7iJqNpA.js']]

                print("\n\n-------- DEPENDENCY LABELS CHANGED --------")
                mapping = {'non-critical': 0, 'translatable': 1, 'critical': 2}
                mapping2 = {
                    0: 'non-critical',
                    1: 'translatable',
                    2: 'critical'
                }
                for a in tmp_dep:
                    tmp_label = 0

                    for i in a:
                        if i not in self.yasir or self.yasir[
                                i].category not in mapping:
                            continue

                        if mapping[self.yasir[i].category] > tmp_label:
                            tmp_label = mapping[self.yasir[i].category]

                    for i in a:
                        if i not in self.yasir or self.yasir[
                                i].category not in mapping:
                            continue

                        if self.yasir[i].category != mapping2[tmp_label]:
                            print("****", i, mapping2[tmp_label],
                                  self.yasir[i].category)

                print("\n\n")
            except RuntimeError:
                pass

        def display_loading_message():
            # Never managed to get this part to display before spinning wheel of death
            self.err_msg.SetForegroundColour((0, 0, 0))
            self.err_msg.SetLabel("Loading page... please wait")
            self.Update()

        def similarity():
            # Print script pairs in self.script_tree with Jaccard similarity > SIMILARITY_THRESHOLD
            names = []
            scripts = []
            for node in PreOrderIter(self.script_tree):
                if node.is_root:
                    continue
                names.append(node.id)
                scripts.append(str(node.content))
            results = similarity_comparison(scripts, SIMILARITY_THRESHOLD)
            if results:
                print("---" * 20)
                print('scripts with similarity > %.2f' % SIMILARITY_THRESHOLD)
            for tup in results:
                print('%s %s %.2f' % (names[tup[0]], names[tup[1]], tup[2]))

        def compare_image_sizes(images):
            # Print difference in original and rendered image sizes for image URLs in images
            for url in images:
                if url[:4] == 'data':
                    # URI rather than URL
                    url = url.partition(';')[-1]
                    body = url.partition(',')[-1]
                    if url[:6] == 'base64':
                        body = base64.b64decode(body)
                else:
                    body = get_resource(url)
                try:
                    stream = BytesIO(body)
                except TypeError:
                    logging.warning("body in %s, not in bytes", type(body))
                    stream = BytesIO(body.encode(ENCODING))
                try:
                    width, height = get_image_size_from_bytesio(
                        stream, DEFAULT_BUFFER_SIZE)
                    self.images[url] = {}
                    self.images[url]['ow'] = width
                    self.images[url]['oh'] = height
                except UnknownImageFormat as error:
                    logging.exception(str(error))
                except struct.error as error:
                    logging.error(str(error))

            for img in self.driver.find_elements_by_tag_name('img'):
                url = img.get_attribute('src')
                if url not in self.images.keys():
                    self.images[url] = {}
                self.images[url]['rw'] = img.size['width']
                self.images[url]['rh'] = img.size['height']

            logging.info("---" * 20)
            logging.info("potential image improvements:")
            for url, dimensions in self.images.items():
                if len(dimensions.keys()) == 4:
                    # Successfully parsed original and rendered dimensions
                    logging.info(url)
                    logging.info("original: %d x %d", dimensions['ow'],
                                 dimensions['oh'])
                    logging.info("rendered: %d x %d", dimensions['rw'],
                                 dimensions['rh'])

        display_loading_message()

        # Reset values
        self.url = self.url_input.GetValue()
        if self.url[-1] != "/":
            self.url = self.url + "/"
        if not self.url:
            return
        reset_display()
        self.script_tree = AnyNode(id=self.url)

        try:
            file_path = PATH + "/reports/" + self.url.split("/")[2]
            if not os.path.exists(file_path):
                os.mkdir(file_path)
            with open(file_path + "/script_tree.txt", 'r') as f:
                logging.debug('importing script tree...')
                importer = JsonImporter()
                self.script_tree = importer.read(f)
            with open(file_path + "/images.json", 'r') as f:
                images = json.load(f)

        except FileNotFoundError:
            logging.debug('script tree does not yet exist, building now')
            # Get original page and parse external scripts
            self.driver.execute_cdp_cmd('Network.setBlockedURLs', {'urls': []})
            epoch_in_milliseconds = time.time() * 1000
            try:
                self.driver.get(self.url)
                self.err_msg.SetLabel("")
            except InvalidArgumentException as exception:
                self.err_msg.SetForegroundColour((255, 0, 0))  # make text red
                self.err_msg.SetLabel(str(exception))
                return
            self.wait_for_load()
            self.script_tree = AnyNode(id=self.url)
            scripts, images = self.parse_log(epoch_in_milliseconds)
            for script in scripts:
                # pylint: disable=undefined-loop-variable
                # pylint: disable=cell-var-from-loop
                parent = anytree.cachedsearch.find(
                    self.script_tree,
                    lambda node: node.id == self.format_src(script['parent']))
                # Check if this node already exists
                node = anytree.cachedsearch.find(
                    self.script_tree,
                    lambda node: node.id == self.format_src(script['url']))
                if node and node.parent == parent:
                    logging.warning('duplicate script! %s',
                                    self.format_src(script['url']))
                    node.count += 1
                else:
                    AnyNode(id=self.format_src(script['url']),
                            parent=parent,
                            content=script['content'],
                            vector=extract_features(script['content']),
                            count=1)

            # Check image differences
            compare_image_sizes(images)

            # Parse inline scripts
            html = get_index_html()
            parse_html(html)
            # self.print_scripts()

            # Export script tree
            logging.debug('exporting script tree...')
            exporter = JsonExporter()
            with open(
                    PATH + "/reports/" + self.url.split("/")[2] +
                    "/script_tree.json", "w") as f:
                exporter.write(self.script_tree, f)
            logging.debug('done')

            # Export images
            with open(
                    PATH + "/reports/" + self.url.split("/")[2] +
                    "/images.json", "w") as f:
                json.dump(images, f)

        # Check similarity
        # similarity()

        # Create buttons
        self.block_all_scripts()
        create_buttons()

        # Print functional dependencies
        # functional_dependency()

        # Get page with all scripts removed
        self.on_apply_press()

        try:
            self.original.get(self.url)
        except InvalidArgumentException as e:
            logging.error(e.what())