def load_from_file(path): path = osp.expanduser(path) if osp.exists(osp.dirname(path)): dict_imp = DictImporter(nodecls=DPNode) imp = JsonImporter(dictimporter=dict_imp, object_hook=tools.object_hook) with open(path, "r") as f: res = imp.read(f) return RemoteTree(res) else: print("Error saving to disk. Dir {} not existing.".format( osp.dirname(path)))
import sys from anytree.importer import JsonImporter from anytree import * if __name__ == '__main__': filename = sys.argv[1] importer = JsonImporter() with open(filename) as filehandle: tree = importer.read(filehandle) print(filename + ' Tree', end='\n\n') print(RenderTree(tree))
import json # Base library from xml.dom import minidom from anytree import RenderTree # pip3 install anytree from anytree.importer import DictImporter from anytree.importer import JsonImporter from sbomify.sbomlibrary import SbomLibrary tree = {} with open("out.json", "r") as f: baseimporter = DictImporter(nodecls=SbomLibrary) importer = JsonImporter(dictimporter=baseimporter) tree = importer.read(f) tree # print(RenderTree(tree)) thestr = tree.toSWID() print(thestr) # reparsed = minidom.parseString(thestr) # print(reparsed.toprettyxml(indent=" "))
m = main_xml.find('processTree') for e in content_to_copy.find('processTree'): m.append(e) def changing_id(xml, needle, replace_by): for e in xml.find('processTree'): for a in ['id', 'sourceId', 'targetId']: if a in e.attrib.keys(): if e.attrib[a] == needle: e.attrib[a] = replace_by importer = JsonImporter() with open('1_split/rendertree.json', 'r') as f: root = importer.read(f) main = ET.parse('2_gather/{}.ptml'.format(root.name)) print(RenderTree(root)) for c in root.children: sub = ET.parse('2_gather/{}.ptml'.format(c.name)) id_in_main = get_by_name(main, c.name).attrib['id'] id_in_sub = sub.find('processTree').attrib['root'] delete_node(main, id_in_main) changing_id(sub, id_in_sub, id_in_main) add_content(main, sub) main.write('2_gather/pt.ptml')
def _deserialize_ontology(filename): """Deserializes an ontology from a JSON file and returns its root.""" importer = JsonImporter() return importer.read(filename)
def main(): parser = get_args() if not sys.argv[1:]: sys.exit(parser.print_help()) #Load args args = parser.parse_args() n = 0 importer = JsonImporter() with open(args.three_prime_template_file) as json_data: three_prime_template = importer.read(json_data) with open(args.five_prime_template_file) as json_data: five_prime_template = importer.read(json_data) hairpins_list=set() reference = defaultdict() with open(args.hairpin_path, 'rt') as hairpins, open(args.mature_path, 'rt') as mature: #Load hairpins ref all_lines = hairpins.read() hairpins = all_lines.split('>') del hairpins[0] for line in hairpins: line = line.split('\n',1) hairpins_list.add(line[1].replace('\n','')) #Load mature RNA for line in mature: line2 = next(mature).strip() name = line.strip() if line2 in reference: reference[line2]['name'] = '{}={}'.format(reference[line2]['name'],name.lstrip('>')) else: reference[line2]=defaultdict() reference[line2]['name'] = name reference[line2]['5_prime_tree'] = copy.deepcopy(five_prime_template) reference[line2]['3_prime_tree'] = copy.deepcopy(three_prime_template) num_seqs = len(reference.keys()) widgets = [progressbar.Percentage(), progressbar.Bar()] print('Mapping isomir modifications to reference.') count=0 bar = progressbar.ProgressBar(widgets=widgets, max_value=num_seqs).start() for sequence in reference.keys(): match_one_end(sequence=sequence,reference_dict=reference[sequence],end=5,hairpin_reference=hairpins_list) match_one_end(sequence=sequence,reference_dict=reference[sequence],end=3,hairpin_reference=hairpins_list) count+=1 bar.update(count) print() print('Generating new reference.') count = 0 new_reference = defaultdict() bar = progressbar.ProgressBar(widgets=widgets, max_value=num_seqs).start() for sequence in reference.keys(): new_reference = generate_string_general(sequence, reference, new_reference) count+=1 bar.update(count) print() print('Checking for multireads.') with open(args.out_path,'w') as out_file: with open(args.multireads_path,'w') as multireads_out: for sequence in new_reference: if(len(new_reference[sequence])>1): ordered_names = order_multireads(names=new_reference[sequence]) multireads_out.write('{}\n{}\n\n'.format('\n'.join(ordered_names), sequence)) out_file.write('{}_multi\n{}\n'.format(ordered_names[0],sequence)) else: out_file.write('{}_unique\n{}\n'.format(next(iter(new_reference[sequence])),sequence)) print('Isomir reference created.')
def analyze(self): """Do everything.""" def reset_display(): # Reset display self.suffix = "?JSTool=none" self.script_buttons.clear() self.choice_boxes.clear() self.number_of_buttons = 0 # self.diff_btn.Show() self.apply_btn.Show() self.save_btn.Show() self.content_panel.Show() self.content_text.SetValue("Script code") while self.script_sizer.GetChildren(): self.script_sizer.Hide(0) self.script_sizer.Remove(0) self.images.clear() def get_index_html(): # Get index.html from remote proxy return get_resource(self.url) def parse_html(html: str): # Add index.html scripts to self.script_tree cnt = 1 if not html: return while "<script" in html: src = "" script_name = "script" + str(cnt) start_index = html.find("<script") end_index = html.find("</script>") text = html[start_index:end_index + 9] new_node = AnyNode(id=script_name, parent=self.script_tree, content=text, vector=extract_features(text), count=1) if ' src="' in text: # BeautifulSoup turns all single quotes into double quotes src = text.split(' src="')[1].split('"')[0] src = self.format_src(src) try: node = anytree.cachedsearch.find( self.script_tree, lambda node: node.id == src) except anytree.search.CountError: logging.warning( 'multiple possible parents: more than one node with id = %s', src) if node: node.parent = new_node html = html.replace(text, "\n<!--" + script_name + "-->\n") cnt += 1 def create_buttons(): # Add checkboxes to display # Check all self.add_button('Check all', 0, 1, None) index = 1 # All other script checkboxes for node in PreOrderIter(self.script_tree): if node.is_root: continue node.button = index # vector = extract_features(node.content) self.add_button(node.id, index, node.depth, get_attribute(node, 'vector')) # node.count checkbox = self.script_buttons[index] if (get_attribute(checkbox, 'confidence') is not None and get_attribute( checkbox, 'confidence') < CONFIDENCE_THRESHOLD): # run clustering if confidence less than threshold checkbox.category = CLUSTER.predict(script=str( node.content), preprocess=True) label = get_attribute(checkbox, 'label') if label: label.SetLabel(checkbox.category) label.SetBackgroundColour( tuple(CATEGORIES[checkbox.category]['color'])) label.SetToolTip( CATEGORIES[checkbox.category]['description']) if get_attribute(checkbox, 'category') not in BLOCKED_CATEGORIES: # ads / marketing scripts disabled by default try: if node.id[:6] != "script": self.blocked_urls.remove(node.id) except ValueError: logging.debug("Could not remove %s from blocked urls", node.id) self.check_boxes(True, node) index += 1 self.scripts_panel.SetSizer(self.script_sizer) self.frame.frame_sizer.Layout() def functional_dependency(): # functional dependencies? try: tmp_dep = perf.get_dependency(self.url) # tmp_dep = [['https://ws.sharethis.com/button/async-buttons.js', 'https://www.google-analytics.com/analytics.js', 'https://ws.sharethis.com/button/buttons.js'], ['https://www.googletagmanager.com/gtm.js?id=GTM-WBDQQ5', 'https://www.googleadservices.com/pagead/conversion_async.js'], ['https://www.unicef.org/sites/default/files/js/js_B7pS3ddmNLFYOJi3j28odiodelMu-EhaOeKlHZ8E6y0.js', 'https://www.unicef.org/themes/custom/unicef/assets/src/js/init-blazy.js?v=1.x', 'https://www.unicef.org/sites/default/files/js/js_dWWS6YNlsZWmXLboSy3PIiSD_Yg3sRxwjbMb52mdNyw.js', 'https://www.unicef.org/sites/default/files/js/js_cLlwgRdoiVfjtFxLqlXX-aVbv3xxfX_uMCsn7iJqNpA.js']] print("\n\n-------- DEPENDENCY LABELS CHANGED --------") mapping = {'non-critical': 0, 'translatable': 1, 'critical': 2} mapping2 = { 0: 'non-critical', 1: 'translatable', 2: 'critical' } for a in tmp_dep: tmp_label = 0 for i in a: if i not in self.yasir or self.yasir[ i].category not in mapping: continue if mapping[self.yasir[i].category] > tmp_label: tmp_label = mapping[self.yasir[i].category] for i in a: if i not in self.yasir or self.yasir[ i].category not in mapping: continue if self.yasir[i].category != mapping2[tmp_label]: print("****", i, mapping2[tmp_label], self.yasir[i].category) print("\n\n") except RuntimeError: pass def display_loading_message(): # Never managed to get this part to display before spinning wheel of death self.err_msg.SetForegroundColour((0, 0, 0)) self.err_msg.SetLabel("Loading page... please wait") self.Update() def similarity(): # Print script pairs in self.script_tree with Jaccard similarity > SIMILARITY_THRESHOLD names = [] scripts = [] for node in PreOrderIter(self.script_tree): if node.is_root: continue names.append(node.id) scripts.append(str(node.content)) results = similarity_comparison(scripts, SIMILARITY_THRESHOLD) if results: print("---" * 20) print('scripts with similarity > %.2f' % SIMILARITY_THRESHOLD) for tup in results: print('%s %s %.2f' % (names[tup[0]], names[tup[1]], tup[2])) def compare_image_sizes(images): # Print difference in original and rendered image sizes for image URLs in images for url in images: if url[:4] == 'data': # URI rather than URL url = url.partition(';')[-1] body = url.partition(',')[-1] if url[:6] == 'base64': body = base64.b64decode(body) else: body = get_resource(url) try: stream = BytesIO(body) except TypeError: logging.warning("body in %s, not in bytes", type(body)) stream = BytesIO(body.encode(ENCODING)) try: width, height = get_image_size_from_bytesio( stream, DEFAULT_BUFFER_SIZE) self.images[url] = {} self.images[url]['ow'] = width self.images[url]['oh'] = height except UnknownImageFormat as error: logging.exception(str(error)) except struct.error as error: logging.error(str(error)) for img in self.driver.find_elements_by_tag_name('img'): url = img.get_attribute('src') if url not in self.images.keys(): self.images[url] = {} self.images[url]['rw'] = img.size['width'] self.images[url]['rh'] = img.size['height'] logging.info("---" * 20) logging.info("potential image improvements:") for url, dimensions in self.images.items(): if len(dimensions.keys()) == 4: # Successfully parsed original and rendered dimensions logging.info(url) logging.info("original: %d x %d", dimensions['ow'], dimensions['oh']) logging.info("rendered: %d x %d", dimensions['rw'], dimensions['rh']) display_loading_message() # Reset values self.url = self.url_input.GetValue() if self.url[-1] != "/": self.url = self.url + "/" if not self.url: return reset_display() self.script_tree = AnyNode(id=self.url) try: file_path = PATH + "/reports/" + self.url.split("/")[2] if not os.path.exists(file_path): os.mkdir(file_path) with open(file_path + "/script_tree.txt", 'r') as f: logging.debug('importing script tree...') importer = JsonImporter() self.script_tree = importer.read(f) with open(file_path + "/images.json", 'r') as f: images = json.load(f) except FileNotFoundError: logging.debug('script tree does not yet exist, building now') # Get original page and parse external scripts self.driver.execute_cdp_cmd('Network.setBlockedURLs', {'urls': []}) epoch_in_milliseconds = time.time() * 1000 try: self.driver.get(self.url) self.err_msg.SetLabel("") except InvalidArgumentException as exception: self.err_msg.SetForegroundColour((255, 0, 0)) # make text red self.err_msg.SetLabel(str(exception)) return self.wait_for_load() self.script_tree = AnyNode(id=self.url) scripts, images = self.parse_log(epoch_in_milliseconds) for script in scripts: # pylint: disable=undefined-loop-variable # pylint: disable=cell-var-from-loop parent = anytree.cachedsearch.find( self.script_tree, lambda node: node.id == self.format_src(script['parent'])) # Check if this node already exists node = anytree.cachedsearch.find( self.script_tree, lambda node: node.id == self.format_src(script['url'])) if node and node.parent == parent: logging.warning('duplicate script! %s', self.format_src(script['url'])) node.count += 1 else: AnyNode(id=self.format_src(script['url']), parent=parent, content=script['content'], vector=extract_features(script['content']), count=1) # Check image differences compare_image_sizes(images) # Parse inline scripts html = get_index_html() parse_html(html) # self.print_scripts() # Export script tree logging.debug('exporting script tree...') exporter = JsonExporter() with open( PATH + "/reports/" + self.url.split("/")[2] + "/script_tree.json", "w") as f: exporter.write(self.script_tree, f) logging.debug('done') # Export images with open( PATH + "/reports/" + self.url.split("/")[2] + "/images.json", "w") as f: json.dump(images, f) # Check similarity # similarity() # Create buttons self.block_all_scripts() create_buttons() # Print functional dependencies # functional_dependency() # Get page with all scripts removed self.on_apply_press() try: self.original.get(self.url) except InvalidArgumentException as e: logging.error(e.what())