def importFranchises(franchiseTags, datFiles, currYear): ##### STEP 1: Determine which teams are available to play ##### tree = xml_parse(datFiles['Teams']) collection = tree.documentElement teams=[] elements = collection.getElementsByTagName('team') for team in elements: teams = teams + [team.getAttribute('title')] ##### STEP 2: Populate Franchise Details for teams from step 1 ##### tree = xml_parse(datFiles['Franchise']) collection = tree.documentElement final={} elements = collection.getElementsByTagName('franchise') for franch in elements: currFranchise = franch.getAttribute('title') eras = franch.getElementsByTagName('era') for era in eras: currEra = era.getAttribute('title') fromYear = int(currEra[0:4]) toYear = int(currEra[5:len(currEra)]) if toYear > currYear: toYear = currYear for x in range(fromYear, toYear+1): currTeam = str(x) + ' ' + currFranchise currValues = Franchise(franchiseTags) #only want to exectue the following if we have a match if currTeam in teams: currValues.setvalue('selected', False) for tag in franchiseTags: if tag=='year': currValues.setvalue('year', x) elif tag=='name': currValues.setvalue('name', currFranchise) else: currTest = era.getElementsByTagName(tag) if len(currTest) > 0: currValues.setvalue(tag, currTest[0].childNodes[0].data) else: currValues.setvalue(tag,'') final.update({currTeam: currValues}) return(final)
def load_xml(filename, verbose): """Loads the XML and outputs a dictionary containing information about the network. """ dom = xml_parse(filename) shape = [] inputs = dom.getElementsByTagName('input') hidden = dom.getElementsByTagName('hidden') outputs = dom.getElementsByTagName('output') mach = bob.machine.MLP((len(inputs), len(hidden), len(outputs))) inputs = set_input(mach, inputs, verbose) hidden, activation = set_hidden(mach, hidden, verbose) outputs, activation = set_output(mach, outputs, activation, verbose) # read bias configuration bias = None c = dom.getElementsByTagName('bias') if c: bias = int(c[0].getAttribute('id')) if verbose: print "BIAS: present, neuron %d" % bias else: mach.biases = 0.0 # load the synapses information synapses = load_synapses(dom.getElementsByTagName('synapse'), verbose) organize_synapses(mach, inputs, hidden, outputs, bias, synapses) return mach
def swath_table(path: str, roi: Polygon) -> 'gpd.pd.DataFrame': """ create Dataframe with Index as burst_id: IW1 burst's Polygon IW1_fit burst/roi intersection % (0...1) """ _gl = os.path.join(path, 'annotation', '*.xml') # print(_gl) xml_pathl = glob(_gl) if not roi.area: raise ValueError("Zero area ROI is not allowed") processed = [] _df_joined = gpd.GeoDataFrame(crs={'init': 'epsg:4326'}) # type: gpd.GeoDataFrame _df_joined.reset_index() for p in xml_pathl: dom = xml_parse(p) _ra = roi.area iw = xmlGetByPath(dom, '/product/adsHeader/swath').childNodes[0].nodeValue if iw not in processed: # print(f"# ---- {iw} {p} -----") df = get_geoloc(dom) _df_joined[iw] = df['geometry'] _df_joined[iw + '_fit'] = df['geometry'].convex_hull.intersection( roi).area / roi.area # df[1:].plot(edgecolor='red', facecolor='green', ax=ax, legend=True) processed.append(iw) return _df_joined
def db_from_xml(source): """Loads the whole of the Database data from an XML file.""" def load_features(c): """Loads all feature of a class, returns a list.""" l = [] properties = [] for f in c.getElementsByTagName('entry'): properties.append(dict(f.attributes.items())) feats = f.getElementsByTagName('feature')[0] l.append(tuple([float(k) for k in feats.childNodes[0].wholeText.split()])) lengths = [len(k) for k in l] if max(lengths) != min(lengths): raise RuntimeError, "Feature lengths should be the same in class %s (min: %d; max: %d)" % (c.getAttribute('name'), min(lengths), max(lengths)) return (lengths[0], l, properties) dom = xml_parse(source) features = {} properties = {} pattern_length = None for c in dom.getElementsByTagName('class'): length, features[c.getAttribute('name')], properties[c.getAttribute('name')] = load_features(c) features[c.getAttribute('name')] = tuple(features[c.getAttribute('name')]) if not pattern_length: pattern_length = length elif length != pattern_length: raise RuntimeError, "Features of class %s do not seem to follow the standard for the database (normal: %d; %s: %d)" % (c.getAttribute('name'), pattern_length, c.getAttribute('name'), length) return features, properties
def __init__(self,chapters_in_order): DocLayoutWidgetActions.__init__(self) self.chapters_in_order = chapters_in_order for idx,chapter in enumerate(self.chapters_in_order): dom = xml_parse(chapter) title = dom.getElementsByTagName("chapter")[0].getAttribute("title") self.chapters[idx+1] = {'dom':dom,'title':title,'file':chapter} self.build_index() self.build_toc()
def _mangle_svg(self, buf): try: dom = xml_parse(buf) except ExpatError: raise RuntimeError("Cannot parse SVG shield.") for svg in dom.getElementsByTagName("svg"): sym_ele = svg.getElementsByTagName("symbol") use_ele = svg.getElementsByTagName("use") if sym_ele.length == 0 or use_ele.length == 0: continue symbols = {} for e in sym_ele: symbols['#' + e.getAttribute('id')] = e.cloneNode(True) e.parentNode.removeChild(e) for e in use_ele: ref = e.getAttribute('xlink:href') x = float(e.getAttribute('x')) y = float(e.getAttribute('y')) group = dom.createElement('g') for ce in symbols[ref].childNodes: node = ce.cloneNode(True) if node.nodeName == 'path': path = node.getAttribute('d') newpath = '' is_x = True for p in path.split(): if not p: continue if p[0].isupper(): dx = x dy = y newpath += p + ' ' elif p[0].islower(): dx = 0 dy = 0 newpath += p + ' ' elif p[0].isnumeric: val = float(p) + (dx if is_x else dy) is_x = not is_x newpath += "%f " % val node.setAttribute('d', newpath) group.appendChild(node) e.parentNode.replaceChild(group, e) return dom.toxml()
def get_list(self): xml = urllib2.urlopen("http://github.com/api/v1/xml/%s"%self.username) repos = xml_parse(xml).getElementsByTagName("repository") self.clone_urls = [] for repo in repos: name = repo.getElementsByTagName("name")[0].childNodes[0].data wiki = repo.getElementsByTagName("has-wiki")[0].childNodes[0].data == "true" self.clone_urls.append(self.github_url(name)) if self.wiki and wiki: self.clone_urls.append((self.github_url(name + ".wiki"), name + "/" + self.wiki))
def get(self): lists = List.all() for sync_list in lists: rss_url = sync_list.rss_url result = urlfetch.fetch(rss_url) if result.status_code == 200: list_dom = xml_parse(result.content) posts = list_dom.getElementsByTagName("item") self.process_posts(sync_list, posts)
def dbpedia_query(resource, desired, what='property'): original_query = u'''select ?{} where {{ <{}> <http://dbpedia.org/{}/{}> ?{} . }}''' query = original_query.format(desired, resource.replace(' ', '_'), what, desired, desired) response = requests.get('http://dbpedia.org/sparql', params={'query': query}) xml = xml_parse(response.content) results = xml.getElementsByTagName('binding') return [result.childNodes[0].childNodes[0].toxml() for result in results]
def parse(self): # TBD: Might as well use BeautifulSoup here, instead of xml_parse dom = xml_parse(self.filename) element = dom.getElementsByTagName('default')[0] self.default = self._attr_to_dict(element.attributes) for remote in dom.getElementsByTagName('remote'): remote_dict = self._attr_to_dict(remote.attributes) self.remotes.update({remote_dict['name']: remote_dict['fetch']}) for project in dom.getElementsByTagName('project'): attrs = self._attr_to_dict(project.attributes) attrs.setdefault('remote', self.default['remote']) self.projects.append(attrs)
def dbpedia_query(resource, desired, what='property'): original_query = u'''select ?{} where {{ <{}> <http://dbpedia.org/{}/{}> ?{} . }}''' query = original_query.format(desired, resource.replace(' ', '_'), what, desired, desired) #print query response = requests.get('http://dbpedia.org/sparql', params={'query': query}) #print response xml = xml_parse(response.content) results = xml.getElementsByTagName('binding') return [result.childNodes[0].childNodes[0].toxml() for result in results]
def __init__(self, path, who, hter): self._xml = xml_parse(path) self._who = who self._units = [] xml_units = self._xml.getElementsByTagName('unit') # compatibility with older versions if not xml_units: xml_units = self._xml.getElementsByTagName('task') for unit in xml_units: self._units.append(Unit.parse(unit, hter))
def load(self): file_path = _resolve_path(self.filename) with open(file_path, 'r') as svg_in: svg_node = get_svg_root(xml_parse(svg_in.read())) groups = get_groups(svg_node) self._parse_layers(groups) if 'default' not in self.layers.keys(): raise ValueError('Missing \'default\' layer') self.default = self.layers['default'] del self.layers['default'] self._create_transition_map() self._loaded = True
def get_list(self): xml = urllib2.urlopen("http://github.com/api/v1/xml/%s"%self.username) repos = xml_parse(xml).getElementsByTagName("repository") self.clone_urls = [] for repo in repos: name = repo.getElementsByTagName("name")[0].childNodes[0].data url = "" if self.protocol == "ssh": url = "[email protected]:%s/%s.git" % (self.username, name) elif self.protocol == "https": url = "https://%[email protected]/%s/%s.git" %(self.username, username, name) else: url = "git://github.com/%s/%s.git" %(self.username, name) self.clone_urls.append(url)
def search_question(self, path, qid = None): dom = xml_parse(path) q = None if not qid: q = dom.getElementsByTagName("question")[0] else: for e in dom.getElementsByTagName("question"): if e.getAttribute("id") == qid: q = e break if q is None: raise Exception("Question node %s not found!" % qid) qtitle = xml_node_get_text(q.getElementsByTagName("title")[0]) qtext = xml_node_get_text(q.getElementsByTagName("text")[0]) return (dom, q, qtitle, qtext)
def data(self): if self.type == "megascan_info": with self.open(encoding="utf-8") as json_file: return json.load(json_file) elif self.type == "url": if self.suffix == ".webloc": from xml.dom.minidom import parse as xml_parse tag = xml_parse(str(self)).getElementsByTagName("string")[0] return tag.firstChild.nodeValue else: import configparser config = configparser.ConfigParser(interpolation=None) config.read(str(self)) return config[config.sections()[0]].get("URL") elif self.type == "blendswap_info": with self.open(encoding="utf-8") as info_file: match = re.search(r"blendswap.com\/blends\/view\/\d+", info_file.read()) if match: return match.group(0) elif self.type == "__info__": with self.open(encoding="utf-8") as json_file: return json.load(json_file) return None
def current_endpoint_data(): raw_content = request.urlopen(VETURILLO_ENDPOINT).read().decode('utf-8') # http://stackoverflow.com/questions/749796/pretty-printing-xml-in-python return xml_parse(raw_content).toprettyxml()
def download_models_from_modbase( query , out_directory = 'modbase_models' , root_filename = '' , dataset = '' , get_alignment = True , write_summary = True , display = True ): """ REQUIRES INTERNET CONNECTION Returns "details" on the models for <query> in ModBase write results to <out_directory> with the base <root_filename> Optionally request models from a specific <dataset> Optionally <get_alingment> too (as PIR file) Optionally <display> a summary of the results Optionally <write_summary> of the models (human readable, also displays) ModBase documentation claims that the interface can accept: databaseID database ID, let's use UniProt dataset a particular ModBase run? modelID same? seqID same? dataset the ModWeb JobID... type "model" or "alignment", this method handles this and that any of the first 4 is enough to identify the target (?) ...for simplicity, let's just look using UniProt IDs as "databaseIDs" apparently to use "non-public" access additional work must be done (something about a "cookies.txt" file, though this seems specific to "wget", may be able to pass in user/password as "modbase_user" and "modbase_passwd") uses xml.dom.minidom to parse the HTML returned...this may not be kosher... but it works...and is easier than using htmllib or sgmllib...(?) """ # url url = 'http://salilab.org/modbase/retrieve/modbase' # format the search query print 'searching modbase for \"' + query +'\"' url += '?databaseID=' + query # currently unused...so why put it here? #for i in search_options.keys(): # url += '&' + i +'='+ search_options[i] # the dataset # if not 'dataset' in search_options.keys() and dataset: if dataset: url += '&dataset=' + dataset # go get the results print 'obtaining model results from:\n\t' + url raw_stream = urllib2.urlopen( url + '&type=model' ) print 'finished downloading models, summarizing the results...' # parse the results results = xml_parse( raw_stream ) # check if empty if not len( results.toxml() ) > 100: # ahhh! I hate arbitrary numbers!!! print 'no models exist in ModBase for this protein...' return {} # get the ids #ids = get_str_from_xml_tag( results , 'model_id' ) # no need, in the header of the model # get the models models = get_str_from_xml_tag( results , 'content' ) # extract the details details , text = get_modbase_model_details( models , display or write_summary , export = True ) # defaults for writing files if not root_filename: root_filename = 'modbase_' + query # optionally write the models if out_directory: create_directory( out_directory , ' to store the models as PDB files' ) print 'writing the downloaded models to ' + out_directory count = 1 filenames = [] for i in models: # write it filename = out_directory + '/' + root_filename + '_model_' + str( count ) + '.pdb' filenames.append( os.path.abspath( filename ) ) # write the alignment f = open( filename , 'w' ) f.write( i.strip() ) f.close() count += 1 # change this in this case models = filenames # SOOO HACKY!!!! # for later safety... out_directory += '/' # optionally grab the alignment too if get_alignment: print 'also downloading the alignments...' raw_aln_stream = urllib2.urlopen( url + '&type=alignment' ) # parse the results aln_results = xml_parse( raw_aln_stream ) # get the files aln_results = aln_results.getElementsByTagName( 'alignmentfile' ) # ...for now, just get the text itself # don't worry about the other details in the XML file print 'writing the alignments as PIR files...' count = 1 for i in aln_results: i = get_str_from_xml_tag( i , 'content' )[0] # just 1, always the first # if out_directory is empty...this will just do as we want filename = out_directory + root_filename + '_model_' + str( count ) + '_alignment.pir' f = open( filename , 'w' ) f.write( i ) f.close() # convert them? # doesn't seem to load these "pir" files...? :( # save in the details? details[count - 1]['alignment'] = i count += 1 # put the models (filenames) into details...cleaner output, just 1 dict for i in xrange( len( models ) ): details[i]['coordinates'] = models[i] # find the "best" model temp = '\nevaluating the \"best\" model by comparing:\n\t1. sequence identity\n\t2. model score\n\t3. target length' print temp text += temp +'\n' best_score = max( [i['sequence identity'] for i in details] ) matches = [i for i in details if i['sequence identity'] == best_score] if len( matches ) > 1 and sum( [not i['model score'] == matches[0]['model score'] for i in matches[1:]] ): # find the best model score best_score = max( [i['model score'] for i in details] ) matches = [i for i in details if i['model score'] == best_score] if len( matches ) > 1 and sum( [not i['target length'] == matches[0]['target length'] for i in matches[1:]] ): best_score = max( [i['target length'] for i in details] ) matches = [i for i in details if i['target length'] == best_score] # debug output if len( matches ) > 1: temp = 'multiple models are \"equally the best\":' print temp text += temp +'\n' for i in matches: temp = '\t'+ i['coordinates'] print temp text += temp +'\n' temp = 'copying the first on to best_model.pdb' print temp text += temp +'\n' else: temp = 'best model: ' + matches[0]['coordinates'] print temp text += temp # move it to a indicative filename copy_file( matches[0]['coordinates'] , out_directory + '/best_model.pdb' ) # optionally write a summary file if write_summary: # if out_directory is empty...this will just do as we want filename = out_directory + root_filename + '_summary.txt' f = open( filename , 'w' ) f.write( text ) f.close() # just the details, has everything else... return details