Python xml_parseの例、xml.dom.minidom.xml_parse Pythonの例

コード例 #1

0

ファイルを表示

ファイル: importfranchises.py プロジェクト: 8bit1337/nano

def importFranchises(franchiseTags, datFiles, currYear):
  ##### STEP 1: Determine which teams are available to play #####
  tree = xml_parse(datFiles['Teams'])
  collection = tree.documentElement

  teams=[]

  elements = collection.getElementsByTagName('team')
  for team in elements:
    teams = teams + [team.getAttribute('title')]

  ##### STEP 2: Populate Franchise Details for teams from step 1 #####
  tree = xml_parse(datFiles['Franchise'])
  collection = tree.documentElement

  final={}
    
  elements = collection.getElementsByTagName('franchise')
  for franch in elements:
    currFranchise = franch.getAttribute('title')
    eras = franch.getElementsByTagName('era')

    for era in eras:
      currEra  = era.getAttribute('title')
                     
      fromYear = int(currEra[0:4])
      toYear = int(currEra[5:len(currEra)])
      if toYear > currYear:
        toYear = currYear
                
      for x in range(fromYear, toYear+1):
        currTeam = str(x) + ' ' + currFranchise
        currValues = Franchise(franchiseTags)

        #only want to exectue the following if we have a match
        if currTeam in teams:
          currValues.setvalue('selected', False)
          for tag in franchiseTags:
            if tag=='year':
              currValues.setvalue('year', x)
            elif tag=='name':
              currValues.setvalue('name', currFranchise)
            else:
              currTest = era.getElementsByTagName(tag)
              if len(currTest) > 0:
                currValues.setvalue(tag, currTest[0].childNodes[0].data)
              else:
                currValues.setvalue(tag,'')
          final.update({currTeam: currValues})
        
  return(final)

コード例 #2

0

ファイルを表示

ファイル: xml2hdf5.py プロジェクト: joskid/bob

def load_xml(filename, verbose):
  """Loads the XML and outputs a dictionary containing information about the
  network.
  """

  dom = xml_parse(filename)

  shape = []
  inputs = dom.getElementsByTagName('input')
  hidden = dom.getElementsByTagName('hidden')
  outputs = dom.getElementsByTagName('output')

  mach = bob.machine.MLP((len(inputs), len(hidden), len(outputs)))

  inputs = set_input(mach, inputs, verbose)
  hidden, activation = set_hidden(mach, hidden, verbose)
  outputs, activation = set_output(mach, outputs, activation, verbose)

  # read bias configuration
  bias = None
  c = dom.getElementsByTagName('bias')
  if c:
    bias = int(c[0].getAttribute('id'))
    if verbose: print "BIAS: present, neuron %d" % bias
  else:
    mach.biases = 0.0

  # load the synapses information
  synapses = load_synapses(dom.getElementsByTagName('synapse'), verbose)

  organize_synapses(mach, inputs, hidden, outputs, bias, synapses)

  return mach

コード例 #3

0

ファイルを表示

def load_xml(filename, verbose):
    """Loads the XML and outputs a dictionary containing information about the
  network.
  """

    dom = xml_parse(filename)

    shape = []
    inputs = dom.getElementsByTagName('input')
    hidden = dom.getElementsByTagName('hidden')
    outputs = dom.getElementsByTagName('output')

    mach = bob.machine.MLP((len(inputs), len(hidden), len(outputs)))

    inputs = set_input(mach, inputs, verbose)
    hidden, activation = set_hidden(mach, hidden, verbose)
    outputs, activation = set_output(mach, outputs, activation, verbose)

    # read bias configuration
    bias = None
    c = dom.getElementsByTagName('bias')
    if c:
        bias = int(c[0].getAttribute('id'))
        if verbose: print "BIAS: present, neuron %d" % bias
    else:
        mach.biases = 0.0

    # load the synapses information
    synapses = load_synapses(dom.getElementsByTagName('synapse'), verbose)

    organize_synapses(mach, inputs, hidden, outputs, bias, synapses)

    return mach

コード例 #4

0

ファイルを表示

def swath_table(path: str, roi: Polygon) -> 'gpd.pd.DataFrame':
    """ create Dataframe with Index as burst_id:
            IW1 burst's Polygon
            IW1_fit burst/roi intersection % (0...1)
    """
    _gl = os.path.join(path, 'annotation', '*.xml')
    # print(_gl)
    xml_pathl = glob(_gl)
    if not roi.area:
        raise ValueError("Zero area ROI is not allowed")
    processed = []
    _df_joined = gpd.GeoDataFrame(crs={'init':
                                       'epsg:4326'})  # type: gpd.GeoDataFrame
    _df_joined.reset_index()
    for p in xml_pathl:
        dom = xml_parse(p)
        _ra = roi.area
        iw = xmlGetByPath(dom,
                          '/product/adsHeader/swath').childNodes[0].nodeValue
        if iw not in processed:
            # print(f"# ---- {iw} {p} -----")
            df = get_geoloc(dom)

            _df_joined[iw] = df['geometry']
            _df_joined[iw + '_fit'] = df['geometry'].convex_hull.intersection(
                roi).area / roi.area
            # df[1:].plot(edgecolor='red', facecolor='green', ax=ax, legend=True)
            processed.append(iw)
    return _df_joined

コード例 #5

0

ファイルを表示

ファイル: Database.py プロジェクト: anjos/neuralringer

def db_from_xml(source):
  """Loads the whole of the Database data from an XML file."""

  def load_features(c):
    """Loads all feature of a class, returns a list."""
    l = []
    properties = []
    for f in c.getElementsByTagName('entry'):
      properties.append(dict(f.attributes.items()))
      feats = f.getElementsByTagName('feature')[0]
      l.append(tuple([float(k) for k in feats.childNodes[0].wholeText.split()]))
    lengths = [len(k) for k in l]
    if max(lengths) != min(lengths):
      raise RuntimeError, "Feature lengths should be the same in class %s (min: %d; max: %d)"  % (c.getAttribute('name'), min(lengths), max(lengths))
    return (lengths[0], l, properties)

  dom = xml_parse(source)
  features = {}
  properties = {}
  pattern_length = None
  for c in dom.getElementsByTagName('class'):
    length, features[c.getAttribute('name')], properties[c.getAttribute('name')] = load_features(c)
    features[c.getAttribute('name')] = tuple(features[c.getAttribute('name')])
    if not pattern_length: pattern_length = length
    elif length != pattern_length:
      raise RuntimeError, "Features of class %s do not seem to follow the standard for the database (normal: %d; %s: %d)" % (c.getAttribute('name'), pattern_length, c.getAttribute('name'), length)

  return features, properties

コード例 #6

0

ファイルを表示

ファイル: DocView.py プロジェクト: vikneshwarselvam/Ezhil-Lang

 def __init__(self,chapters_in_order):
     DocLayoutWidgetActions.__init__(self)
     self.chapters_in_order = chapters_in_order
     for idx,chapter in enumerate(self.chapters_in_order):
         dom = xml_parse(chapter)
         title = dom.getElementsByTagName("chapter")[0].getAttribute("title")
         self.chapters[idx+1] = {'dom':dom,'title':title,'file':chapter}
     self.build_index()
     self.build_toc()

コード例 #7

0

ファイルを表示

ファイル: shield_maker.py プロジェクト: hholzgra/waymarkedtrails-shields

    def _mangle_svg(self, buf):
        try:
            dom = xml_parse(buf)
        except ExpatError:
            raise RuntimeError("Cannot parse SVG shield.")

        for svg in dom.getElementsByTagName("svg"):
            sym_ele = svg.getElementsByTagName("symbol")
            use_ele = svg.getElementsByTagName("use")

            if sym_ele.length == 0 or use_ele.length == 0:
                continue

            symbols = {}
            for e in sym_ele:
                symbols['#' + e.getAttribute('id')] = e.cloneNode(True)
                e.parentNode.removeChild(e)

            for e in use_ele:
                ref = e.getAttribute('xlink:href')
                x   = float(e.getAttribute('x'))
                y   = float(e.getAttribute('y'))

                group = dom.createElement('g')

                for ce in symbols[ref].childNodes:
                    node = ce.cloneNode(True)

                    if node.nodeName == 'path':
                        path = node.getAttribute('d')

                        newpath = ''
                        is_x = True
                        for p in path.split():
                            if not p:
                                continue

                            if p[0].isupper():
                                dx = x
                                dy = y
                                newpath += p + ' '
                            elif p[0].islower():
                                dx = 0
                                dy = 0
                                newpath += p + ' '
                            elif p[0].isnumeric:
                                val = float(p) + (dx if is_x else dy)
                                is_x = not is_x
                                newpath += "%f " % val

                        node.setAttribute('d', newpath)

                    group.appendChild(node)

                e.parentNode.replaceChild(group, e)

        return dom.toxml()

コード例 #8

0

ファイルを表示

ファイル: listers.py プロジェクト: stigkj/metagit

 def get_list(self):
     xml = urllib2.urlopen("http://github.com/api/v1/xml/%s"%self.username)
     repos = xml_parse(xml).getElementsByTagName("repository")
     self.clone_urls = []
     for repo in repos:
         name = repo.getElementsByTagName("name")[0].childNodes[0].data
         wiki = repo.getElementsByTagName("has-wiki")[0].childNodes[0].data == "true"
         self.clone_urls.append(self.github_url(name))
         if self.wiki and wiki:
             self.clone_urls.append((self.github_url(name + ".wiki"), name + "/" + self.wiki))

コード例 #9

0

ファイルを表示

ファイル: tasks.py プロジェクト: xydrolase/craigsreptile

    def get(self):
        lists = List.all()
        for sync_list in lists:
            rss_url = sync_list.rss_url

            result = urlfetch.fetch(rss_url)
            if result.status_code == 200:
                list_dom = xml_parse(result.content)
                posts = list_dom.getElementsByTagName("item")
                
                self.process_posts(sync_list, posts)

コード例 #10

0

ファイルを表示

ファイル: dbpedia.py プロジェクト: itacab/MMD-1

def dbpedia_query(resource, desired, what='property'):
    original_query = u'''select ?{} where {{
    <{}>
    <http://dbpedia.org/{}/{}> ?{} . }}'''
    query = original_query.format(desired, resource.replace(' ', '_'), what,
                                  desired, desired)
    response = requests.get('http://dbpedia.org/sparql',
                            params={'query': query})
    xml = xml_parse(response.content)
    results = xml.getElementsByTagName('binding')
    return [result.childNodes[0].childNodes[0].toxml() for result in results]

コード例 #11

0

ファイルを表示

ファイル: sourcesdiff.py プロジェクト: jonallengriffin/sourcesdiff

 def parse(self):
     # TBD: Might as well use BeautifulSoup here, instead of xml_parse
     dom = xml_parse(self.filename)
     element = dom.getElementsByTagName('default')[0]
     self.default = self._attr_to_dict(element.attributes)
     for remote in dom.getElementsByTagName('remote'):
         remote_dict = self._attr_to_dict(remote.attributes)
         self.remotes.update({remote_dict['name']: remote_dict['fetch']})
     for project in dom.getElementsByTagName('project'):
         attrs = self._attr_to_dict(project.attributes)
         attrs.setdefault('remote', self.default['remote'])
         self.projects.append(attrs)

コード例 #12

0

ファイルを表示

def dbpedia_query(resource, desired, what='property'):
    original_query = u'''select ?{} where {{
<{}>
<http://dbpedia.org/{}/{}> ?{} . }}'''
    query = original_query.format(desired, resource.replace(' ', '_'), what,
                                  desired, desired)
    #print query
    response = requests.get('http://dbpedia.org/sparql',
                            params={'query': query})
    #print response
    xml = xml_parse(response.content)
    results = xml.getElementsByTagName('binding')
    return [result.childNodes[0].childNodes[0].toxml() for result in results]

コード例 #13

0

ファイルを表示

    def __init__(self, path, who, hter):
        self._xml = xml_parse(path)
        self._who = who
        self._units = []

        
        xml_units = self._xml.getElementsByTagName('unit')

        # compatibility with older versions
        if not xml_units:
            xml_units = self._xml.getElementsByTagName('task')

        for unit in xml_units:
            self._units.append(Unit.parse(unit, hter))

コード例 #14

0

ファイルを表示

ファイル: perparse.py プロジェクト: carolscarton/PET

    def __init__(self, path, who, hter):
        self._xml = xml_parse(path)
        self._who = who
        self._units = []

        
        xml_units = self._xml.getElementsByTagName('unit')

        # compatibility with older versions
        if not xml_units:
            xml_units = self._xml.getElementsByTagName('task')

        for unit in xml_units:
            self._units.append(Unit.parse(unit, hter))

コード例 #15

0

ファイルを表示

    def load(self):
        file_path = _resolve_path(self.filename)

        with open(file_path, 'r') as svg_in:
            svg_node = get_svg_root(xml_parse(svg_in.read()))

        groups = get_groups(svg_node)
        self._parse_layers(groups)
        if 'default' not in self.layers.keys():
            raise ValueError('Missing \'default\' layer')
        self.default = self.layers['default']
        del self.layers['default']
        self._create_transition_map()
        self._loaded = True

コード例 #16

0

ファイルを表示

ファイル: listers.py プロジェクト: ossrjus/metagit

    def get_list(self):
        xml = urllib2.urlopen("http://github.com/api/v1/xml/%s"%self.username)
        repos = xml_parse(xml).getElementsByTagName("repository")
        self.clone_urls = []
        for repo in repos:
            name = repo.getElementsByTagName("name")[0].childNodes[0].data
            url = ""
            if self.protocol == "ssh":
                url = "[email protected]:%s/%s.git" % (self.username, name)
            elif self.protocol == "https":
                url = "https://%[email protected]/%s/%s.git" %(self.username, username, name)
            else:
                url = "git://github.com/%s/%s.git" %(self.username, name)

            self.clone_urls.append(url)

コード例 #17

0

ファイルを表示

ファイル: nxpanel.py プロジェクト: etlapale/molotov

    def search_question(self, path, qid = None):
        dom = xml_parse(path)
        q = None
        if not qid:
            q = dom.getElementsByTagName("question")[0]
        else:
            for e in dom.getElementsByTagName("question"):
                if e.getAttribute("id") == qid:
                    q = e
                    break
            if q is None:
                raise Exception("Question node %s not found!" % qid)
        
        qtitle = xml_node_get_text(q.getElementsByTagName("title")[0])
        qtext = xml_node_get_text(q.getElementsByTagName("text")[0])

        return (dom, q, qtitle, qtext)

コード例 #18

0

ファイルを表示

def data(self):
    if self.type == "megascan_info":
        with self.open(encoding="utf-8") as json_file:
            return json.load(json_file)
    elif self.type == "url":
        if self.suffix == ".webloc":
            from xml.dom.minidom import parse as xml_parse
            tag = xml_parse(str(self)).getElementsByTagName("string")[0]
            return tag.firstChild.nodeValue
        else:
            import configparser
            config = configparser.ConfigParser(interpolation=None)
            config.read(str(self))
            return config[config.sections()[0]].get("URL")
    elif self.type == "blendswap_info":
        with self.open(encoding="utf-8") as info_file:
            match = re.search(r"blendswap.com\/blends\/view\/\d+",
                              info_file.read())
            if match:
                return match.group(0)
    elif self.type == "__info__":
        with self.open(encoding="utf-8") as json_file:
            return json.load(json_file)
    return None

コード例 #19

0

ファイルを表示

ファイル: scrapper.py プロジェクト: maciej/veturillo

def current_endpoint_data():
    raw_content = request.urlopen(VETURILLO_ENDPOINT).read().decode('utf-8')
    # http://stackoverflow.com/questions/749796/pretty-printing-xml-in-python
    return xml_parse(raw_content).toprettyxml()

コード例 #20

0

ファイルを表示

ファイル: modbase.py プロジェクト: Olvikon/miscellaneous_scripts

def download_models_from_modbase( query ,
        out_directory = 'modbase_models' , root_filename = '' ,
        dataset = '' , get_alignment = True , write_summary = True ,
        display = True ):
    """
    REQUIRES INTERNET CONNECTION    

    Returns "details" on the models for  <query>  in ModBase
    write results to  <out_directory>  with the base  <root_filename>
    
    Optionally request models from a specific  <dataset>
    Optionally  <get_alingment>  too (as PIR file)
    Optionally  <display>  a summary of the results
    Optionally  <write_summary>  of the models (human readable, also displays)
    
    ModBase documentation claims that the interface can accept:
        databaseID      database ID, let's use UniProt
        dataset         a particular ModBase run?
        modelID         same?
        seqID           same?

        dataset         the ModWeb JobID...
        type            "model" or "alignment", this method handles this
    and that any of the first 4 is enough to identify the target (?)
    ...for simplicity, let's just look using UniProt IDs as "databaseIDs"
    
    apparently to use "non-public" access additional work must be done
    (something about a "cookies.txt" file, though this seems specific to "wget",
    may be able to pass in user/password as "modbase_user" and "modbase_passwd")
    
    uses xml.dom.minidom to parse the HTML returned...this may not be kosher...
    but it works...and is easier than using htmllib or sgmllib...(?)
    """
    # url
    url = 'http://salilab.org/modbase/retrieve/modbase'
    
    # format the search query
    print 'searching modbase for \"' + query +'\"'
    url += '?databaseID=' + query
    # currently unused...so why put it here?
    #for i in search_options.keys():
    #    url += '&' + i +'='+ search_options[i]
    
    # the dataset
#    if not 'dataset' in search_options.keys() and dataset:
    if dataset:
        url += '&dataset=' + dataset

    # go get the results
    print 'obtaining model results from:\n\t' + url
    raw_stream = urllib2.urlopen( url  + '&type=model' )    
    print 'finished downloading models, summarizing the results...'
        
    # parse the results
    results = xml_parse( raw_stream )

    # check if empty
    if not len( results.toxml() ) > 100:    # ahhh! I hate arbitrary numbers!!!
        print 'no models exist in ModBase for this protein...'
        return {}
    
    # get the ids
    #ids = get_str_from_xml_tag( results , 'model_id' )
    # no need, in the header of the model
    
    # get the models
    models = get_str_from_xml_tag( results , 'content' )
        
    # extract the details
    details , text = get_modbase_model_details( models , display or write_summary , export = True )
        
    # defaults for writing files
    if not root_filename:
        root_filename = 'modbase_' + query
    
    # optionally write the models
    if out_directory:
        create_directory( out_directory , ' to store the models as PDB files' )
        print 'writing the downloaded models to ' + out_directory
        count = 1
        filenames = []
        for i in models:
            # write it
            filename = out_directory + '/' + root_filename + '_model_' + str( count ) + '.pdb'
            filenames.append( os.path.abspath( filename ) )

            # write the alignment
            f = open( filename , 'w' )
            f.write( i.strip() )
            f.close()
            count += 1
        
        # change this in this case
        models = filenames
        
        # SOOO HACKY!!!!
        # for later safety...
        out_directory += '/'

    # optionally grab the alignment too
    if get_alignment:
        print 'also downloading the alignments...'
        raw_aln_stream = urllib2.urlopen( url  + '&type=alignment' )

        # parse the results
        aln_results = xml_parse( raw_aln_stream )
        
        # get the files
        aln_results = aln_results.getElementsByTagName( 'alignmentfile' )
        
        # ...for now, just get the text itself
        # don't worry about the other details in the XML file
        print 'writing the alignments as PIR files...'
        count = 1
        for i in aln_results:
            i = get_str_from_xml_tag( i , 'content' )[0]    # just 1, always the first
            
            # if out_directory is empty...this will just do as we want
            filename = out_directory + root_filename + '_model_' + str( count ) + '_alignment.pir'
            f = open( filename , 'w' )
            f.write( i )
            f.close()
            
            # convert them?
            # doesn't seem to load these "pir" files...? :(
            
            # save in the details?
            details[count - 1]['alignment'] = i
            
            count += 1
    
    # put the models (filenames) into details...cleaner output, just 1 dict
    for i in xrange( len( models ) ):
        details[i]['coordinates'] = models[i]
    
    # find the "best" model
    temp = '\nevaluating the \"best\" model by comparing:\n\t1. sequence identity\n\t2. model score\n\t3. target length'
    print temp
    text += temp +'\n'
    best_score = max( [i['sequence identity'] for i in details] )
    matches = [i for i in details if i['sequence identity'] == best_score]
    if len( matches ) > 1 and sum( [not i['model score'] == matches[0]['model score'] for i in matches[1:]] ):
        # find the best model score
        best_score = max( [i['model score'] for i in details] )
        matches = [i for i in details if i['model score'] == best_score]
        
        if len( matches ) > 1 and sum( [not i['target length'] == matches[0]['target length'] for i in matches[1:]] ):
            best_score = max( [i['target length'] for i in details] )
            matches = [i for i in details if i['target length'] == best_score]
   
    # debug output
    if len( matches ) > 1:
        temp = 'multiple models are \"equally the best\":'
        print temp
        text += temp +'\n'
        for i in matches:
            temp = '\t'+ i['coordinates']
            print temp
            text += temp +'\n'
        temp = 'copying the first on to best_model.pdb'
        print temp
        text += temp +'\n'
    else:
        temp = 'best model: ' + matches[0]['coordinates']
        print temp
        text += temp
    # move it to a indicative filename
    copy_file( matches[0]['coordinates'] , out_directory + '/best_model.pdb' )

    # optionally write a summary file
    if write_summary:
        # if out_directory is empty...this will just do as we want
        filename = out_directory + root_filename + '_summary.txt'
        f = open( filename , 'w' )
        f.write( text )
        f.close()
    
    # just the details, has everything else...
    return details