def _save_json(self, node): '''export the catalog in json''' exp = JsonExporter(indent=2, sort_keys=True) with open(self.path, 'w') as f: exp.write(node, f) self._debug('Catalog saved to json \"{}\"'.format(self.path)) return True
def create(tweetsFile): # A node should have an nodeNr (starting on 0), idStr(tweet id), parent. propTree = PropTree() # an instance of a tree nodeNr = 0 # to be ordered by time unknownNodeNr = 0 requestCounter = 1 posts = [] print(tweetsFile) for line in open(tweetsFile, 'r'): posts.append(json.loads(line)) # make a list of json arrays print("This file contains " + str(len(posts)) + " posts.") for post in posts: post[ 'tweet_nr'] = nodeNr # adds a new key, which is the id for a post when in the tree (does this do anything really? Should we include post as a JSON in the AnyNode object?) idStr = post['id_str'] idUser = post['user']['id_str'] if 'retweeted_status' in post: parentIdStr = post['retweeted_status']['id_str'] parentIdUser = post['retweeted_status']['user']['id_str'] elif 'quoted_status' in post: parentIdStr = post['quoted_status']['id_str'] parentIdUser = post['quoted_status']['user']['id_str'] if 'retweeted_status' in post or 'quoted_status' in post: # make retweet or quote node parentNode = getFriendInTree(propTree, idUser, parentIdStr, requestCounter, len(posts)) requestCounter += 1 if parentNode is None: # if this node has no parent we want to artificially create one parentNodeNr = "ex" + str( unknownNodeNr ) # artificial parents can be distinguished by an ex in their id parentNode = AnyNode(nodeNr=parentNodeNr, idStr=parentIdStr, idUser=parentIdUser) propTree.addRoot(parentNode) unknownNodeNr += 1 AnyNode(nodeNr=nodeNr, idStr=idStr, idUser=idUser, parent=parentNode) else: # this is original content reference = AnyNode(nodeNr=nodeNr, idStr=idStr, idUser=idUser) propTree.addRoot(reference) nodeNr += 1 propTree.updatePosts(posts) exporter = JsonExporter(indent=2, sort_keys=True) open('./data/tree/trees/' + tweetsFile[30:-4] + '.txt', 'w').close savedFile = open('./data/tree/trees/' + tweetsFile[30:-4] + '.txt', 'r+') for root in propTree.roots: exporter.write(root, savedFile) savedFile.write("&\n") savedFile.close() propTree.makeNodeTree() writeToFile(propTree) return propTree
def main(ioc_file, output_dir): with open(ioc_file) as csvfile: iocreader = csv.reader(csvfile, delimiter=',') for row in iocreader: root = AnyNode(id=row[1], type=row[0]) logger.info('=========Start to explore IOC: %s', root.id) ioc_list = build_ioc_relation(root) timestamp = datetime.now().strftime('%Y%m%d%H%M') query_depth = config.get('general', 'depth') txtfile = output_dir + root.id + '_depth_' + query_depth + '_' + timestamp + '.txt' file = open(txtfile, "w") file.write(str(RenderTree(root))) file.close() logger.info('Export IOCs to TXT file: %s', txtfile) jsonfile = output_dir + root.id + '_depth_' + query_depth + '_' + timestamp + '.json' file = open(jsonfile, "w") exporter = JsonExporter(indent=2, sort_keys=False) exporter.write(root, file) file.close() logger.info('Export IOCs to JSON file: %s', jsonfile) logger.info('=========Done exploration for IOC: %s', root.id) return
def test_json_exporter(): """Json Exporter.""" root = AnyNode(id="root") s0 = AnyNode(id="sub0", parent=root) AnyNode(id="sub0B", parent=s0) AnyNode(id="sub0A", parent=s0) s1 = AnyNode(id="sub1", parent=root) AnyNode(id="sub1A", parent=s1) AnyNode(id="sub1B", parent=s1) s1c = AnyNode(id="sub1C", parent=s1) AnyNode(id="sub1Ca", parent=s1c) lines = [ '{', ' "children": [', ' {', ' "children": [', ' {', ' "id": "sub0B"', ' },', ' {', ' "id": "sub0A"', ' }', ' ],', ' "id": "sub0"', ' },', ' {', ' "children": [', ' {', ' "id": "sub1A"', ' },', ' {', ' "id": "sub1B"', ' },', ' {', ' "children": [', ' {', ' "id": "sub1Ca"', ' }', ' ],', ' "id": "sub1C"', ' }', ' ],', ' "id": "sub1"', ' }', ' ],', ' "id": "root"', '}' ] exporter = JsonExporter(indent=2, sort_keys=True) exported = exporter.export(root).split("\n") exported = [e.rstrip() for e in exported] # just a fix for a strange py2x behavior. eq_(exported, lines) with NamedTemporaryFile(mode="w+") as ref: with NamedTemporaryFile(mode="w+") as gen: ref.write("\n".join(lines)) exporter.write(root, gen) assert filecmp.cmp(ref.name, gen.name)
def _serialize_ontology(root, filename=None): """Serializes an ontology given by its root to a JSON file. If no output filename is given, return the serialized as string. """ exporter = JsonExporter(indent=2, sort_keys=True) if filename: exporter.write(root, filename) else: return exporter.export(root)
def export_tree(self, filename): exporter_dict = DictExporter(dictcls=OrderedDict, attriter=sorted) self.export_cst_dict = exporter_dict.export(self.cst) exporter_json = JsonExporter(indent=2, sort_keys=True) with open(filename, 'w') as filehandle: exporter_json.write(self.cst, filehandle) print('CST tree export to JSON successful!') return
def main(): """Make a full tree from the default targets, and export it in graphviz and JSON form.""" tree = make_tree() DotExporter(tree).to_dotfile('full_tree.dot') with open('full_tree.json', 'w') as f: exporter = JsonExporter(indent=4, sort_keys=True) exporter.write(tree, f) print(f'node count: {len(tree.descendants)}')
def save_to_file(self, path, start_node=None): path = osp.expanduser(path) if osp.exists(osp.dirname(path)): exp = JsonExporter(indent=2, sort_keys=True, default=tools.default) with open(path, "w") as f: if start_node is None: exp.write(self._tree, f) else: exp.write(start_node, f) else: print("Error saving to disk. Dir {} not existing.".format( osp.dirname(path)))
def test_json_exporter(): """Json Exporter.""" root = AnyNode(id="root") s0 = AnyNode(id="sub0", parent=root) AnyNode(id="sub0B", parent=s0) AnyNode(id="sub0A", parent=s0) s1 = AnyNode(id="sub1", parent=root) AnyNode(id="sub1A", parent=s1) AnyNode(id="sub1B", parent=s1) s1c = AnyNode(id="sub1C", parent=s1) AnyNode(id="sub1Ca", parent=s1c) exporter = JsonExporter(indent=2, sort_keys=True) exported = exporter.export(root).split("\n") exported = [e.rstrip() for e in exported] # just a fix for a strange py2x behavior. lines = [ '{', ' "children": [', ' {', ' "children": [', ' {', ' "id": "sub0B"', ' },', ' {', ' "id": "sub0A"', ' }', ' ],', ' "id": "sub0"', ' },', ' {', ' "children": [', ' {', ' "id": "sub1A"', ' },', ' {', ' "id": "sub1B"', ' },', ' {', ' "children": [', ' {', ' "id": "sub1Ca"', ' }', ' ],', ' "id": "sub1C"', ' }', ' ],', ' "id": "sub1"', ' }', ' ],', ' "id": "root"', '}' ] eq_(exported, lines) exporter = JsonExporter(indent=2, sort_keys=True, maxlevel=2) exported = exporter.export(root).split("\n") exported = [e.rstrip() for e in exported] # just a fix for a strange py2x behavior. limitedlines = [ '{', ' "children": [', ' {', ' "id": "sub0"', ' },', ' {', ' "id": "sub1"', ' }', ' ],', ' "id": "root"', '}' ] eq_(exported, limitedlines) try: with NamedTemporaryFile(mode="w+", delete=False) as ref: with NamedTemporaryFile(mode="w+", delete=False) as gen: ref.write("\n".join(lines)) exporter.write(root, gen) # on Windows, you must close the files before comparison filecmp.cmp(ref.name, gen.name) finally: os.remove(ref.name) os.remove(gen.name)
def to_json(self, sink= None, **kwargs): """ writes region tree info to json Arguments: sink (str or None): file to save to. if None, will return json object. kwargs: addtional arguments to pass to anytree.exporter.jsonexporter.JsonExporter and json.dumps. """ exporter = JsonExporter(indent=2, **kwargs) if sink: with open(sink, 'w') as outfile: exporter.write(self.tree_root, outfile) return sink else: data = exporter.export(self.tree_root) return data
def createJson(): os.chdir(os.path.dirname(__file__)) os.chdir("..") os.chdir("data") philFile = open('PhilpapersTaxonomy.txt', 'r') nodes = {"1":Node("root")} #for every line in our extracted philpapers taxonomy, make a new node for our tree for line in reader(philFile): #get just the integers from our ID (get rid of spaces, commas, etc...) though we keep them as strings currID=''.join(filter(str.isdigit, line[1])) #create N nodes. the key for each node is the ID from philFile, meaning that we can #access any arbitrary category using its ID nodes[currID]=(Node("temp")) #reset out position in philFile so we can restart from the front philFile.seek(0) #remember that our file is organized as follows ["name" "ID" "parent IDs" "primary parent ID"] for line in reader(philFile): #Take the ID number of the category and find the corresponding node from our dictionary #after finding said node, we set its parent node to the corresponding node from our dictionary #We also get just the integers from our ID (get rid of spaces, commas, etc...) though we keep them #as strings so that they play nice with the dictionary currID=''.join(filter(str.isdigit, line[1])) currParentID=''.join(filter(str.isdigit, line[len(line)-1])) currName = line[0] currName=str(currName) #set the current node's name to the corresponding name nodes[currID].name = currName #we then set the parent ID to the primary parent ID nodes[currID].parent=nodes[currParentID] philFile.close() #export our tree to json exporter = JsonExporter(indent=2, sort_keys=True) with open('data.json', 'w') as f: exporter.write(nodes["1"],f) data = pd.read_json("data.json") df = pd.DataFrame(data['children'])
def saving_tree_test(): # For now user should start by creating a root node root_node = Node(root) # Maybe the user wants to create more nodes to add to the tree a_node = Node(_a) b_node = Node(_b) # Then user should create a tree and initialize it with a root node tree_to_save = TTree("root", root_node) # Then add nodes to the tree tree_to_save.add_node(root_node, a_node) tree_to_save.add_node(root_node, b_node) """ Tree in this example looks like this... * root (0) * ├── _a (1) * └── _b (2) """ print('\n') print("Confirm that tree matches example code:") tree_to_save.print_tree(True) print('\n') from anytree.exporter import JsonExporter # The default lambda expression tells json what the default value of an # objects stuff should be if the value cannot be serialized js_exporter = JsonExporter( indent=2, sort_keys=True, default=lambda o: '<not serializable>') with open("./ts_modeling/saved_trees/tree_to_save.json", 'w') as js_file: js_exporter.write(tree_to_save.root, js_file) print("Here is the json formatting:") print(js_exporter.export(tree_to_save.root)) print('\n')
sentences = list(map(lambda x: x.lower(), sentences)) for index in range(len(sentences)): extracted_sentence = sentences[index] correct_tree = None all_trees = parser(extracted_sentence, grammar_path) tree = best_tree(all_trees) semantic = str(tree.label()['SEM']) k = 0 for i in range(len(correct_regex)): if re.match(correct_regex[i], semantic): k = i break print("Match with RegExpr {}.\n{}\n".format(str(k + 1), semantic)) root = None if k == 0: root = sentence_1(tree) elif k == 1: root = sentence_2(tree) elif k == 2: root = sentence_3(tree) exporter = JsonExporter(indent=2, sort_keys=True) with open('../output/' + 'sentence_plan_' + str(index) + '.json', 'w') as file: exporter.write(root, file)
def create(tweetsFile, generalFileName): # A node should have an nodeNr (starting on 0), idStr(tweet id), parent. propTree = PropTree() # an instance of a tree nodeNr = 0 # to be ordered by time unknownNodeNr = 0 requestCounter = 1 posts = [] for line in open(tweetsFile, 'r'): posts.append(json.loads(line)) # make a list of json arrays print(len(posts)) quotesAndRetweets = 0 repostedUsers = {} for post in posts: userID = None if 'retweeted_status' in post: userID = post['retweeted_status']['user']['id_str'] elif 'quoted_status' in post: userID = post['quoted_status']['user']['id_str'] if userID is not None: quotesAndRetweets += 1 if userID in repostedUsers: repostedUsers[userID] += 1 else: repostedUsers[userID] = 1 for post in posts: post['tweet_nr'] = nodeNr # adds a new key, which is the id for a post when in the tree (does this do anything really? Should we include post as a JSON in the AnyNode object?) idStr = post['id_str'] idUser = post['user']['id_str'] timeStamp = post['created_at'] followerCount = post['user']['followers_count'] if 'retweeted_status' in post: parentIdStr = post['retweeted_status']['id_str'] parentIdUser = post['retweeted_status']['user']['id_str'] parentTimeStamp = post['retweeted_status']['created_at'] parentFollowerCount = post['retweeted_status']['user']['followers_count'] elif 'quoted_status' in post: parentIdStr = post['quoted_status']['id_str'] parentIdUser = post['quoted_status']['user']['id_str'] parentTimeStamp = post['quoted_status']['created_at'] parentFollowerCount = post['quoted_status']['user']['followers_count'] if 'retweeted_status' in post or 'quoted_status' in post: # make retweet or quote node parentNode = getFriendInTree(propTree, idUser, parentIdStr, parentIdUser, requestCounter, len(posts)) requestCounter += 1 if parentNode is None: # if this node has no parent we want to artificially create one parentNodeNr = "x" + str(unknownNodeNr) # artificial parents can be distinguished by an ex in their id parentNode = AnyNode(nodeNr=parentNodeNr, idStr=parentIdStr, idUser=parentIdUser, time=parentTimeStamp, followerCount=parentFollowerCount) propTree.addRoot(parentNode) if str(parentIdUser) in repostedUsers: if int(repostedUsers[parentIdUser]) > int(parentFollowerCount)/5000: if parentIdUser not in propTree.rootFollowers: propTree.addRootFollowers(parentIdUser, getFollowers(parentIdUser, requestCounter, len(posts))) requestCounter += 1 unknownNodeNr += 1 AnyNode(nodeNr=nodeNr, idStr=idStr, idUser=idUser, parent=parentNode, time=timeStamp, followerCount=followerCount) else: # this is original content reference = AnyNode(nodeNr=nodeNr, idStr=idStr, idUser=idUser, time=timeStamp, followerCount=followerCount) propTree.addRoot(reference) if str(idUser) in repostedUsers: if int(repostedUsers[idUser]) > int(followerCount)/5000: if idUser not in propTree.rootFollowers: propTree.addRootFollowers(idUser, getFollowers(idUser, requestCounter, len(posts))) requestCounter += 1 nodeNr += 1 propTree.updatePosts(posts) exporter = JsonExporter(indent=2, sort_keys=True) saveFileName = propTree.getFileName() open('./data/tree/trees/other/' + saveFileName + '.txt', 'w').close savedFile = open('./data/tree/trees/other/' + saveFileName + '.txt', 'w') for root in propTree.roots: exporter.write(root, savedFile) savedFile.write("&\n") savedFile.close() writeToFile(propTree, generalFileName) return propTree
hasDependencies=dep["node"]["hasDependencies"], parent=library, ) if dep["node"]["repository"]["licenseInfo"] is not None: tnode.licenseString = dep["node"]["repository"][ "licenseInfo"]["spdxId"] else: print(dep) tnode = SbomLibrary( dep["node"]["packageName"], version=dep["node"]["requirements"], packageManager=dep["node"]["packageManager"], hasDependencies=False, incompleteReason="Unable to determine repository host", parent=library, ) foom = SbomLibrary( "snipe-it", packageRepositoryURL="https://github.com/snipe/snipe-it", hasDependencies=True, ) retrieveDependencies(foom) # for child in foom.children: # retrieveDependencies(child) with open("out.json", "w") as f: exporter = JsonExporter(indent=2, sort_keys=True) exporter.write(foom, f)
json_structures.remove(structure) index = 0 while (len(json_structures) > 0): structure = json_structures[index] found = find_by_attr(root, name="id", value=structure["parentId"]) if found: content = json_structures.pop(index)["documents"][0]["content"] content = REGEX_WRAP.findall(content.strip())[0] links = REGEX_LINK.finditer(content) for link in links: content = content.replace(link.group(0), "@JournalEntry[" + link.group(1) + "]") special = REGEX_SPEC.finditer(content) for spec in special: content = content.replace(spec.group(0), "") node = AnyNode(id=structure["id"], parent=found, name=structure["name"], data=content) index = 0 else: index = index + 1 if index > len(json_structures): index = 0 exporter = JsonExporter() exporter.write(root, open(args.output, "w"))
def export_tree_in_json(tree, path): f = open(os.path.join(path, 'data-flare.json'), 'w') exporter = JsonExporter(indent=4) exporter.write(tree, f)
def analyze(self): """Do everything.""" def reset_display(): # Reset display self.suffix = "?JSTool=none" self.script_buttons.clear() self.choice_boxes.clear() self.number_of_buttons = 0 # self.diff_btn.Show() self.apply_btn.Show() self.save_btn.Show() self.content_panel.Show() self.content_text.SetValue("Script code") while self.script_sizer.GetChildren(): self.script_sizer.Hide(0) self.script_sizer.Remove(0) self.images.clear() def get_index_html(): # Get index.html from remote proxy return get_resource(self.url) def parse_html(html: str): # Add index.html scripts to self.script_tree cnt = 1 if not html: return while "<script" in html: src = "" script_name = "script" + str(cnt) start_index = html.find("<script") end_index = html.find("</script>") text = html[start_index:end_index + 9] new_node = AnyNode(id=script_name, parent=self.script_tree, content=text, vector=extract_features(text), count=1) if ' src="' in text: # BeautifulSoup turns all single quotes into double quotes src = text.split(' src="')[1].split('"')[0] src = self.format_src(src) try: node = anytree.cachedsearch.find( self.script_tree, lambda node: node.id == src) except anytree.search.CountError: logging.warning( 'multiple possible parents: more than one node with id = %s', src) if node: node.parent = new_node html = html.replace(text, "\n<!--" + script_name + "-->\n") cnt += 1 def create_buttons(): # Add checkboxes to display # Check all self.add_button('Check all', 0, 1, None) index = 1 # All other script checkboxes for node in PreOrderIter(self.script_tree): if node.is_root: continue node.button = index # vector = extract_features(node.content) self.add_button(node.id, index, node.depth, get_attribute(node, 'vector')) # node.count checkbox = self.script_buttons[index] if (get_attribute(checkbox, 'confidence') is not None and get_attribute( checkbox, 'confidence') < CONFIDENCE_THRESHOLD): # run clustering if confidence less than threshold checkbox.category = CLUSTER.predict(script=str( node.content), preprocess=True) label = get_attribute(checkbox, 'label') if label: label.SetLabel(checkbox.category) label.SetBackgroundColour( tuple(CATEGORIES[checkbox.category]['color'])) label.SetToolTip( CATEGORIES[checkbox.category]['description']) if get_attribute(checkbox, 'category') not in BLOCKED_CATEGORIES: # ads / marketing scripts disabled by default try: if node.id[:6] != "script": self.blocked_urls.remove(node.id) except ValueError: logging.debug("Could not remove %s from blocked urls", node.id) self.check_boxes(True, node) index += 1 self.scripts_panel.SetSizer(self.script_sizer) self.frame.frame_sizer.Layout() def functional_dependency(): # functional dependencies? try: tmp_dep = perf.get_dependency(self.url) # tmp_dep = [['https://ws.sharethis.com/button/async-buttons.js', 'https://www.google-analytics.com/analytics.js', 'https://ws.sharethis.com/button/buttons.js'], ['https://www.googletagmanager.com/gtm.js?id=GTM-WBDQQ5', 'https://www.googleadservices.com/pagead/conversion_async.js'], ['https://www.unicef.org/sites/default/files/js/js_B7pS3ddmNLFYOJi3j28odiodelMu-EhaOeKlHZ8E6y0.js', 'https://www.unicef.org/themes/custom/unicef/assets/src/js/init-blazy.js?v=1.x', 'https://www.unicef.org/sites/default/files/js/js_dWWS6YNlsZWmXLboSy3PIiSD_Yg3sRxwjbMb52mdNyw.js', 'https://www.unicef.org/sites/default/files/js/js_cLlwgRdoiVfjtFxLqlXX-aVbv3xxfX_uMCsn7iJqNpA.js']] print("\n\n-------- DEPENDENCY LABELS CHANGED --------") mapping = {'non-critical': 0, 'translatable': 1, 'critical': 2} mapping2 = { 0: 'non-critical', 1: 'translatable', 2: 'critical' } for a in tmp_dep: tmp_label = 0 for i in a: if i not in self.yasir or self.yasir[ i].category not in mapping: continue if mapping[self.yasir[i].category] > tmp_label: tmp_label = mapping[self.yasir[i].category] for i in a: if i not in self.yasir or self.yasir[ i].category not in mapping: continue if self.yasir[i].category != mapping2[tmp_label]: print("****", i, mapping2[tmp_label], self.yasir[i].category) print("\n\n") except RuntimeError: pass def display_loading_message(): # Never managed to get this part to display before spinning wheel of death self.err_msg.SetForegroundColour((0, 0, 0)) self.err_msg.SetLabel("Loading page... please wait") self.Update() def similarity(): # Print script pairs in self.script_tree with Jaccard similarity > SIMILARITY_THRESHOLD names = [] scripts = [] for node in PreOrderIter(self.script_tree): if node.is_root: continue names.append(node.id) scripts.append(str(node.content)) results = similarity_comparison(scripts, SIMILARITY_THRESHOLD) if results: print("---" * 20) print('scripts with similarity > %.2f' % SIMILARITY_THRESHOLD) for tup in results: print('%s %s %.2f' % (names[tup[0]], names[tup[1]], tup[2])) def compare_image_sizes(images): # Print difference in original and rendered image sizes for image URLs in images for url in images: if url[:4] == 'data': # URI rather than URL url = url.partition(';')[-1] body = url.partition(',')[-1] if url[:6] == 'base64': body = base64.b64decode(body) else: body = get_resource(url) try: stream = BytesIO(body) except TypeError: logging.warning("body in %s, not in bytes", type(body)) stream = BytesIO(body.encode(ENCODING)) try: width, height = get_image_size_from_bytesio( stream, DEFAULT_BUFFER_SIZE) self.images[url] = {} self.images[url]['ow'] = width self.images[url]['oh'] = height except UnknownImageFormat as error: logging.exception(str(error)) except struct.error as error: logging.error(str(error)) for img in self.driver.find_elements_by_tag_name('img'): url = img.get_attribute('src') if url not in self.images.keys(): self.images[url] = {} self.images[url]['rw'] = img.size['width'] self.images[url]['rh'] = img.size['height'] logging.info("---" * 20) logging.info("potential image improvements:") for url, dimensions in self.images.items(): if len(dimensions.keys()) == 4: # Successfully parsed original and rendered dimensions logging.info(url) logging.info("original: %d x %d", dimensions['ow'], dimensions['oh']) logging.info("rendered: %d x %d", dimensions['rw'], dimensions['rh']) display_loading_message() # Reset values self.url = self.url_input.GetValue() if self.url[-1] != "/": self.url = self.url + "/" if not self.url: return reset_display() self.script_tree = AnyNode(id=self.url) try: file_path = PATH + "/reports/" + self.url.split("/")[2] if not os.path.exists(file_path): os.mkdir(file_path) with open(file_path + "/script_tree.txt", 'r') as f: logging.debug('importing script tree...') importer = JsonImporter() self.script_tree = importer.read(f) with open(file_path + "/images.json", 'r') as f: images = json.load(f) except FileNotFoundError: logging.debug('script tree does not yet exist, building now') # Get original page and parse external scripts self.driver.execute_cdp_cmd('Network.setBlockedURLs', {'urls': []}) epoch_in_milliseconds = time.time() * 1000 try: self.driver.get(self.url) self.err_msg.SetLabel("") except InvalidArgumentException as exception: self.err_msg.SetForegroundColour((255, 0, 0)) # make text red self.err_msg.SetLabel(str(exception)) return self.wait_for_load() self.script_tree = AnyNode(id=self.url) scripts, images = self.parse_log(epoch_in_milliseconds) for script in scripts: # pylint: disable=undefined-loop-variable # pylint: disable=cell-var-from-loop parent = anytree.cachedsearch.find( self.script_tree, lambda node: node.id == self.format_src(script['parent'])) # Check if this node already exists node = anytree.cachedsearch.find( self.script_tree, lambda node: node.id == self.format_src(script['url'])) if node and node.parent == parent: logging.warning('duplicate script! %s', self.format_src(script['url'])) node.count += 1 else: AnyNode(id=self.format_src(script['url']), parent=parent, content=script['content'], vector=extract_features(script['content']), count=1) # Check image differences compare_image_sizes(images) # Parse inline scripts html = get_index_html() parse_html(html) # self.print_scripts() # Export script tree logging.debug('exporting script tree...') exporter = JsonExporter() with open( PATH + "/reports/" + self.url.split("/")[2] + "/script_tree.json", "w") as f: exporter.write(self.script_tree, f) logging.debug('done') # Export images with open( PATH + "/reports/" + self.url.split("/")[2] + "/images.json", "w") as f: json.dump(images, f) # Check similarity # similarity() # Create buttons self.block_all_scripts() create_buttons() # Print functional dependencies # functional_dependency() # Get page with all scripts removed self.on_apply_press() try: self.original.get(self.url) except InvalidArgumentException as e: logging.error(e.what())
#Returns: # 1 - nothing, but node will be a full tree once it returns def get_label_tree(label_list_to_do, node): if len(label_list_to_do) == 0: return else: for label in label_list_to_do: try: time.sleep(1) #label_names.append(str(label.id) + " " + label.name) #print(str(label.id) +" "+ label.name) curr_node = Node(label.name,parent = node) get_label_tree(label.sublabels,curr_node) except: return return #Discogs Client Initialization d = discogs_client.Client('getalluniversallabels/0.1', user_token='NMEOClFdbylQxiIwvtLKvwIJioGlKIdzQFxDlVzQ') boss_label = d.label(38404) # 38404 is the Discogs ID for UMG root = Node("Universal Music Group") get_label_tree(boss_label.sublabels,root) #Exporting to JSON exporter = JsonExporter(indent = 2, sort_keys = False) #exports to tree.json file fh = open("tree.json","w") exporter.write(root,fh)
def to_json(self, filename='minmax.json'): from anytree.exporter import JsonExporter exporter = JsonExporter(indent=2) with open(filename, 'w') as file: exporter.write(self._root, file)
import anytree as at import commons as com import pandas as pd from anytree.exporter import JsonExporter symptom_df =pd.read_csv(com.BASEDIR + "csvs/symptom_tree_MEDCIN/symptom_tree_MEDCIN.csv") def add_children(parent_node, parent_aui): print("add children of: ", parent_node) for index in (symptom_df.index[symptom_df['PAUI'] == parent_aui].tolist()): child_aui = symptom_df.iloc[index, 0] # AUI is the first column child_cui = symptom_df.iloc[index, 2] # where here CUI is the third column child_name = symptom_df.iloc[index, 3] # the name of the child child_node = at.Node(child_cui, parent=parent_node, concept_name=child_name) add_children(child_node, child_aui) root_cui = "C1457887" root_aui = "A21010092" root_node = at.Node(root_cui, concept_name="symptoms") add_children(root_node, root_aui) exporter = JsonExporter(indent=2, sort_keys=True) fh = open("/home/niksart/...", "w") exporter.write(root_node, fh)
class DependencyBuilder(Processor): """SeaCOW processor class for concordance writing""" def __init__(self): self.column_index = None self.column_head = None self.column_relation = None self.column_token = None self.fileprefix = None self.savejson = False self.saveimage = None # others: 'png' or 'dot' self.printtrees = False self.imagemetaid1 = None self.imagemetaid2 = None def prepare(self, query): if self.saveimage and not self.imagemetaid1: raise ProcessorError('You cannot save to image files without setting at least imagemetaid1.') if not (self.column_token, self.column_index and self.column_head and self.column_relation): raise ProcessorError('You have to set the column indices for the dependency information.') self.has_attributes = True if len(query.attributes) > 1 else False self.rex = re.compile('^<.+>$') if self.savejson: self.exporter = JsonExporter(indent = 2, sort_keys = False) self.writer = open(self.fileprefix + '.json', 'w') def finalise(self, query): return True if self.savejson: self.writer.close() def filtre(self, tree, line): return True def process(self, query, region, meta, match_offset, match_length): # Turn Mantee stuff into usable structure. line = cow_region_to_conc(region, self.has_attributes) # Find true tokens via indices (not structs) for separating match from context. # Turn everything into nodes already - to be linked into tree in next step. indices = [i for i, s in enumerate(line) if not self.rex.match(s[0])] nodes = [Node("0", token = "TOP", relation = "", head = "", linear = 0, meta = dict(zip(query.references, meta))),] + \ [Node(make_token_safe(line[x][self.column_index]), token = line[x][self.column_token], relation = line[x][self.column_relation], head = line[x][self.column_head], linear = int(line[x][self.column_index]), **dict(zip([query.attributes[a] for a in self.attribs], [line[x][a] for a in self.attribs])) ) for x in indices] # Build tree from top. for n in nodes[1:]: n.parent = next((x for x in nodes if x.name == n.head), None) # If a descendant implements the filter, certain structures can be # discarded. if not self.filtre(nodes, line): return # Export as desired. Three independent formats. if self.printtrees: for pre, _, node in RenderTree(nodes[0]): print("%s%s (%s)" % (pre, node.token, node.name)) if self.savejson: self.exporter.write(nodes[0], self.writer) if self.saveimage: fnam = self.fileprefix + '_' + meta[self.imagemetaid1] if self.imagemetaid2: fnam = fnam + '_' + meta[self.imagemetaid2] if self.saveimage is 'dot': DotExporter(nodes[0]).to_dotfile(fnam + '.dot') elif self.saveimage is 'png': DotExporter(nodes[0], edgeattrfunc = edgeattrfunc, nodenamefunc = nodenamefunc).to_picture(fnam + '.png')
def toJson(self, root): exporter = JsonExporter(indent=2, sort_keys=False) print(exporter.export(root)) with open('trie_disk.json', 'w') as f: exporter.write(root, f)