Example #1
0
def pubmed_adapter(search=None, id=None):
    '''
    Sample queries:
    #curl "http://localhost:8880/pubmed?"
    curl "http://localhost:8880/pubmed?search=stem+cells"
    curl "http://localhost:8880/pubmed?id=19358275"
    '''
    #FIXME: How do we handle no search or id param?  Just serve up the latest entries?  Or error as below?
    #assert_(not(search and id), msg="You must specify the 'search' or 'id' query parameter is mandatory.")
    if search:
        #search = first_item(search)
        #reldate: only search for last N days
        #query = urllib.urlencode({'db' : NCBI_DB, 'term': query, 'reldate': '60', 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        query = urllib.urlencode({'term': search, 'db' : NCBI_DB, 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        search_url = NCBI_SEARCH_PATTERN + query
        logger.debug("Term search URL: " + search_url)
        doc = bindery.parse(search_url, standalone=True)
        search_terms = search
        ids = ( unicode(i) for i in doc.eSearchResult.IdList.Id )
        ids = ','.join(ids)
        self_link = '/pubmed?search='+search
    else:
        #ids = first_item(id)
        #fulltext = fulltext[0] if fulltext else u'no'
        #if fulltext == 'yes':
        search_terms = ids
        self_link = '/pubmed?id='+ids
    query = urllib.urlencode({'db' : NCBI_DB, 'id': ids, 'retmode': 'xml'})
    search_url = NCBI_ARTICLE_ACCESS_PATTERN + query
    logger.debug("ID search URL: " + search_url)
    alt_link = search_url
    doc = bindery.parse(search_url, standalone=True, model=PUBMED_MODEL)
    #doc = bindery.parse(open('/Users/uche/tmp/efetch.fcgi.html'), standalone=True, model=PUBMED_MODEL)
    metadata, first_id = metadata_dict(generate_metadata(doc))
    return atom_results(doc, metadata, self_link, alt_link, search_terms)
Example #2
0
def moin2atomentries(wikibase, outputdir, rewrite, pattern):
    wikibase_len = len(rewrite)
    if pattern: pattern = re.compile(pattern)
    #print (wikibase, outputdir, rewrite)
    req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT})
    with closing(urllib2.urlopen(req)) as resp:
        feed = bindery.parse(resp)
    for item in feed.RDF.channel.items.Seq.li:
        uri = split_fragment(item.resource)[0]
        relative = uri[wikibase_len:]
        print >> sys.stderr, uri, relative
        if pattern and not pattern.match(relative):
            continue
        if rewrite:
            uri = uri.replace(rewrite, wikibase)
        req = urllib2.Request(uri, headers={'Accept': DOCBOOK_IMT})
        with closing(urllib2.urlopen(req)) as resp:
            page = bindery.parse(resp)
        entrydate = dateparse(unicode(page.article.articleinfo.revhistory.revision.date))
        if entrydate.tzinfo == None: entrydate = entrydate.replace(tzinfo=DEFAULT_TZ)
        output = os.path.join(outputdir, OUTPUTPATTERN%pathsegment(relative))
        if os.access(output, os.R_OK):
            lastrev = dateparse(unicode(bindery.parse(output).entry.updated))
            if lastrev.tzinfo == None: lastrev = lastrev.replace(tzinfo=DEFAULT_TZ)
            if (entrydate == lastrev):
                print >> sys.stderr, 'Not updated.  Skipped...'
                continue
        print >> sys.stderr, 'Writing to ', output
        with open(output, 'w') as output:
            handle_page(uri, page, outputdir, relative, output)
    return
Example #3
0
 def test_xbel_2(self):  
     #BM1 = 'http://hg.4suite.org/amara/trunk/raw-file/bb6c40828b2d/demo/7days/bm1.xbel'
     #BM2 = 'http://hg.4suite.org/amara/trunk/raw-file/bb6c40828b2d/demo/7days/bm2.xbel'
     doc1 = bindery.parse(FILE('bm1.xbel'))
     doc2 = bindery.parse(FILE('bm2.xbel'))
     
     merge(doc1.xbel, doc2.xbel)
     normalize_whitespace(doc1)
     output = doc1.xml_encode(lookup("xml-indent")) + '\n'
     self.assertEqual(output, open(FILE('merged-2.xbel')).read())
Example #4
0
def test_simple_atom_entry():
    '''Basic ns fixup upon mutation'''
    doc = bindery.parse(ATOMENTRY1)
    s = cStringIO.StringIO()
    xml_print(doc, stream=s)
    out = s.getvalue()
    #self.assertEqual(out, ATOMENTRY1)
    diff = treecompare.xml_diff(out, ATOMENTRY1)
    diff = '\n'.join(diff)
    assert not diff, "Expected=%r, returned=%r diff=%r" % (ATOMENTRY1, out, diff)
    #Make sure we can parse the result
    doc2 = bindery.parse(out)
Example #5
0
 def test_combined(self):
     #ATOM1 = 'http://zepheira.com/news/atom/entries/'
     #ATOM2 = 'http://ma.gnolia.com/atom/full/people/Uche'
     ATOM1 = FILE('zepheira_atom.xml')  #local download for testing
     ATOM2 = FILE('magnolia_uche.xml')  #local download for testing
     output = cStringIO.StringIO()
     combined_output = open(FILE('entries_combined.txt')).read()  #local file for testing
     doc1 = bindery.parse(ATOM1)
     doc2 = bindery.parse(ATOM2)
     combined = itertools.chain(*[doc.feed.entry for doc in (doc1, doc2)])
     for node in sorted(combined, key=operator.attrgetter('updated')):
         print >> output, node.title
     self.assertEqual(output.getvalue(), combined_output)
Example #6
0
def test_simple_atom_entry():
    '''Basic ns fixup upon mutation'''
    doc = bindery.parse(ATOMENTRY1)
    s = cStringIO.StringIO()
    xml_print(doc, stream=s)
    out = s.getvalue()
    #self.assertEqual(out, ATOMENTRY1)
    diff = treecompare.xml_diff(out, ATOMENTRY1)
    diff = '\n'.join(diff)
    assert not diff, "Expected=%r, returned=%r diff=%r" % (ATOMENTRY1, out,
                                                           diff)
    #Make sure we can parse the result
    doc2 = bindery.parse(out)
Example #7
0
def aggregate_entries(envelope, entries):
    '''
    envelope - input source of atom feed document to enclose entries
        if it has any entries, the new ones are appended
    entries - sequence of entry input sources
    '''
    envelope_doc = bindery.parse(envelope, model=FEED_MODEL)
    entrydocs = [ bindery.parse(entry, model=ENTRY_MODEL) for entry in entries ]
    #for entry in sorted(entrydocs, key=lambda x: attrgetter('updated')):
    for entry_doc in sorted(entrydocs, key=lambda x: str(x.entry.updated), reverse=True):
        envelope_doc.feed.xml_append(entry_doc.entry)
    metadata = generate_metadata(envelope_doc)
    return envelope_doc, metadata
Example #8
0
def parse_landing_zone(ips_dict, cluster_config):
    """ Returns the first landing zone found.
        return (public_ip, directory)"""

    cluster_config = cluster_config.strip().encode('utf-8')

    if not ips_dict:
        raise ClusterError('Cannot parse landing zone. ' +
                           'No ip translation table. ' +
                           'Cluster may not be ready yet.')

    if not cluster_config:
        raise ClusterError('Cannot parse landing zone. ' +
                           'Config is empty. ' +
                           'Cluster may not be ready yet.')

    doc = bindery.parse(cluster_config.strip())

    # Create node to ip address translation table
    nodes = {}
    for e in doc.Environment.Hardware.Computer:
        nodes[e.name] = [e.netAddress]

    Software = doc.Environment.Software

    # attributes - build, buildSet, computer, description, directory, name
    instances = Software.DropZone
    [nodes[instance.computer].append('Drop Zone') for instance in instances]

    computer = instances[0].computer
    private_ip = nodes[computer][0]
    directory = instances[0].directory
    public_ip = ips_dict[private_ip]

    return (public_ip, directory)
Example #9
0
def moincms(wikibase, outputdir, pattern):
    if pattern: pattern = re.compile(pattern)
    #print (wikibase, outputdir, rewrite)
    req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT})
    resp = urllib2.urlopen(req)
    original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
    feed = bindery.parse(resp)
    process_list = []
    for item in feed.RDF.channel.items.Seq.li:
        uri = split_fragment(item.resource)[0]
        #print >> sys.stderr, (uri, str(item.resource), split_fragment(item.resource))
        #Deal with the wrapped URI
        if original_wiki_base:
            #print >> sys.stderr, (uri, original_wiki_base.rstrip('/')+'/')
            relative = relativize(uri,
                                  original_wiki_base.rstrip('/') +
                                  '/').lstrip('/')
            uri = absolutize(relative, wikibase)
        #print >> sys.stderr, (uri, relative)
        if pattern and not pattern.match(relative):
            continue
        n = node.factory(uri, relative, outputdir)
        if n.up_to_date():
            pass
            #print >> sys.stderr, 'Up to date.  Skipped...'
        else:
            process_list.append(n)

    #Process nodes needing update according to priority
    for n in sorted(process_list, key=attrgetter('PRIORITY'), reverse=True):
        #print >> sys.stderr, "processing ", n.rest_uri
        n.render()
    return
Example #10
0
def read_info(fname):
    all = bindery.parse(fname.read()).vector
    info = [v for v in all.xml_children if v.xml_type == "element"]

    def setImplements(fcns):
        for fcn in fcns:
            fcn.name = str(fcn.nameStr)
            fcn.impl = getattr(fcn, "implName", None)
            fcn.impl = (str(fcn.impl) if fcn.impl else fcn.impl)
            fcn.line_idx = int(str(fcn.lineNum)) - 1
            fcn.is_generator = str(fcn.isGenerator) == "true"
        for fcn in fcns:
            fcn.is_toplevel = getattr(fcn, "is_toplevel", False) or bool(
                fcn.impl)
            if fcn.impl:
                try:
                    get_singleton(v for v in fcns
                                  if v.name == fcn.impl).is_toplevel = True
                except GetSingletonEmptyException:
                    global couldntfindexception
                    couldntfindexception = True
                    print("couldn't find function %s for file %s" %
                          (fcn.impl, fcn.srcFile))
        return fcns

    return dict(list(info).equiv_classes(
        lambda a: Path(str(a.srcFile)))).map_values(setImplements)
Example #11
0
    def scanXMLDocument(self, path):
        i=0
        
        self.XML_W = lookup("xml")
        self.doc = bindery.parse(path)

        self.control.SetText(open(path).read())
        self.control.EmptyUndoBuffer()
        self.control.Colourise(0, -1)

        # line numbers in the margin
        self.control.SetMarginType(1, wx.stc.STC_MARGIN_NUMBER)
        self.control.SetMarginWidth(1, 25)

        #self.control.BeginTextColour((255,0,0))
        #self.control.SetValue(self.doc.xml_encode(self.XML_W))
        self.Sentences = []

        for i in range (0, self.control.GetNumberOfLines()):
            Line = self.control.GetLineText(i)
            Line.strip()

        #To remove spaces in the line
        Line = re.sub(r"\s+", "", Line, flags=re.UNICODE)

        if (Line[0]=='<') & (Line[1]=='S') & (Line[2]=='i'):
            """
            Everytime we encouter '<Si' at the beginning
            of a line, we append the Sentence Tab with
            the line number
            """
            self.Sentences.append(i)
Example #12
0
 def testDelChildElement7(self):
     DOC = "<a><b>spam</b><b>eggs</b></a>"
     EXPECTED = '<a><b>spam</b></a>'
     doc = bindery.parse(DOC)
     del doc.a[u'b'][1]
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #13
0
 def test_update_bindery(self):
     doc = bindery.parse(self.XML)
     #Add a new text node to a (--> last child)
     doc.a.xml_append(u'New Content')
     self.assertEqual(doc.a.xml_children[-1].xml_value, u'New Content')
     new_elem = doc.xml_element_factory(None, u'spam')
     doc.a.xml_append(new_elem)
     self.assertEqual(doc.a.xml_children[-1], new_elem)
     
     new_text = amara.tree.text(u'New Content')
     doc.a.xml_insert(1, new_text)
     self.assertEqual(doc.a.xml_children[1], new_text)
     
     #Remove the last b child from a
     num_kids = len(doc.a.xml_children)
     #e1 = doc.a.b[-1].e
     b1 = doc.a.b[1]
     b1.xml_parent.xml_remove(b1)
     self.assertEqual(len(doc.a.xml_children), num_kids-1)
     
     doc = bindery.nodes.entity_base()
     #doc.xml_clear()  #Remove all children from a
     doc.xml_append_fragment(self.XML)
     check_bindery(self, doc)
     return
Example #14
0
 def __init__(self, egdoc):
     from amara import bindery
     self.model_document = bindery.parse(egdoc)
     self.model_document.xml_model.prefixes = top_namespaces(
         self.model_document)
     self.setup_model()
     return
Example #15
0
 def testSetChildElement5(self):
     DOC = "<a><b>spam</b><b>spam</b></a>"
     EXPECTED = '<a><b>spam</b><b>eggs</b></a>'
     doc = bindery.parse(DOC)
     doc.a.b[1] = u"eggs"
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #16
0
    def xml_append_fragment(self, frag):
        from amara.bindery import parse

        doc = parse(frag)
        for child in doc.xml_children:
            self.xml_append(child)
        return
Example #17
0
 def testDelChildElement4(self):
     DOC = "<a><b>spam</b><b>spam</b></a>"
     doc = bindery.parse(DOC)
     def edit():
         del doc.a.b[2]
     self.assertRaises(IndexError, edit)
     return
Example #18
0
 def testSetAttribute7(self):
     DOC = '<a><b>spam</b></a>'
     EXPECTED = '<a foo="bar"><b>spam</b></a>'
     doc = bindery.parse(DOC)
     doc.a.xml_attributes[u"foo"] = u"bar"
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #19
0
 def testDelAttribute1(self):
     DOC = '<a b="spam"><b>spam</b></a>'
     EXPECTED = '<a><b>spam</b></a>'
     doc = bindery.parse(DOC)
     del doc.a.b
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #20
0
 def testSetAttribute2(self):
     DOC = '<a b="spam"><b>spam</b></a>'
     EXPECTED = '<a b="eggs"><b>spam</b></a>'
     doc = bindery.parse(DOC)
     doc.a.xml_attributes[u'b'] = u"eggs"
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #21
0
    def OnSave(self,e):
        # Save away the edited text
        # Open the file, do an RU sure check for an overwrite!
        
        # Grab the content to be saved
        itcontains = self.control.GetValue().encode("utf-8")
        # Checking if the XML is correct
        try:
            self.doc = bindery.parse(itcontains)
        except bindery.Error as e:
            line = e.lineNumber
            dial = wx.MessageDialog(None, 'Error with XML parsing at line '+str(line-1)+' \nCannot save document, please correct the error', 'Error', 
                            wx.OK | wx.ICON_ERROR)
            if dial.ShowModal() == wx.ID_OK:
                dial.Destroy()
                return
        dlg = wx.FileDialog(self, "Choose a file", self.dirname, self.filename, "*.*", \
                            wx.SAVE | wx.OVERWRITE_PROMPT)
        if dlg.ShowModal() == wx.ID_OK:

                
            # Open the file for write, write, close
            self.filename=dlg.GetFilename()
            self.dirname=dlg.GetDirectory()
            filehandle=open(os.path.join(self.dirname, self.filename),'w')
            filehandle.write(itcontains)
            filehandle.close()
            # Get rid of the dialog to keep things tidy
        dlg.Destroy()
Example #22
0
 def testSetChildElement6(self):
     DOC = "<a><b>spam</b><b>spam</b></a>"
     doc = bindery.parse(DOC)
     def edit():
         doc.a.b[2] = u"eggs"
     self.assertRaises(IndexError, edit)
     return
Example #23
0
        def test_update_bindery(self):
            doc = bindery.parse(self.XML)
            #Add a new text node to a (--> last child)
            doc.a.xml_append(u'New Content')
            self.assertEqual(doc.a.xml_children[-1].xml_value, u'New Content')
            new_elem = doc.xml_element_factory(None, u'spam')
            doc.a.xml_append(new_elem)
            self.assertEqual(doc.a.xml_children[-1], new_elem)

            new_text = amara.tree.text(u'New Content')
            doc.a.xml_insert(1, new_text)
            self.assertEqual(doc.a.xml_children[1], new_text)

            #Remove the last b child from a
            num_kids = len(doc.a.xml_children)
            #e1 = doc.a.b[-1].e
            b1 = doc.a.b[1]
            b1.xml_parent.xml_remove(b1)
            self.assertEqual(len(doc.a.xml_children), num_kids - 1)

            doc = bindery.nodes.entity_base()
            #doc.xml_clear()  #Remove all children from a
            doc.xml_append_fragment(self.XML)
            check_bindery(self, doc)
            return
Example #24
0
File: moin.py Project: mredar/akara
 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type,
                                    node.ENDPOINTS[akara_type], doc,
                                    metadata, original_wiki_base)
     else:
         instance = cls(rest_uri,
                        moin_link,
                        opener,
                        cache=(doc, metadata, original_wiki_base))
         return instance
Example #25
0
    def addAudioSample(self, start, end):
        i = self.control.GetInsertionPoint()
        j = self.control.LineFromPosition(i)
        section = self.getSectionNumber(j)
        line= self.control.GetFirstVisibleLine()
        
        try:
            self.doc = bindery.parse(self.control.GetValue().encode("utf-8"))
        except bindery.Error as e:
            print e.lineNumber

        if section != 0:
            try:
                self.doc.TEXT.S[section-1].AUDIO.start = self.SimplifyAudioLimit(start)
                self.doc.TEXT.S[section-1].AUDIO.end = self.SimplifyAudioLimit(end)
            except AttributeError:
                new_elem = self.doc.xml_element_factory(None, u"AUDIO")
                new_elem.xml_attributes.setnode(new_elem.xml_attribute_factory(None, u"start", self.SimplifyAudioLimit(start)))
                new_elem.xml_attributes.setnode(new_elem.xml_attribute_factory(None, u"end", self.SimplifyAudioLimit(end)))
                self.doc.TEXT.S[section-1].xml_insert(0, new_elem)
            self.control.SetValue(self.doc.xml_encode(self.XML_W))
            i = self.control.PositionFromLine(j)
            self.control.SetInsertionPoint(i)

            self.control.ScrollToLine(line)
            self.focus = section-1
Example #26
0
def moincms(wikibase, outputdir, pattern):
    if pattern: pattern = re.compile(pattern)
    #print (wikibase, outputdir, rewrite)
    req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT})
    resp = urllib2.urlopen(req)
    original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
    feed = bindery.parse(resp)
    process_list = []
    for item in feed.RDF.channel.items.Seq.li:
        uri = split_fragment(item.resource)[0]
        #print >> sys.stderr, (uri, str(item.resource), split_fragment(item.resource))
        #Deal with the wrapped URI
        if original_wiki_base:
            #print >> sys.stderr, (uri, original_wiki_base.rstrip('/')+'/')
            relative = relativize(uri, original_wiki_base.rstrip('/')+'/').lstrip('/')
            uri = absolutize(relative, wikibase)
        #print >> sys.stderr, (uri, relative)
        if pattern and not pattern.match(relative):
            continue
        n = node.factory(uri, relative, outputdir)
        if n.up_to_date():
            pass
            #print >> sys.stderr, 'Up to date.  Skipped...'
        else:
            process_list.append(n)
            
    #Process nodes needing update according to priority
    for n in sorted(process_list, key=attrgetter('PRIORITY'), reverse=True):
        #print >> sys.stderr, "processing ", n.rest_uri
        n.render()
    return
Example #27
0
    def list_records(self, set="", resumption_token = ""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)

        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]
        if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
            resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
        else:
            resumption_token = ''
        return {'records' : records, 'resumption_token' : resumption_token}
Example #28
0
 def testSetAttribute5(self):
     DOC = '<a><b>spam</b></a>'
     EXPECTED = '<a xmlns:ns="urn:bogus" ns:foo="bar"><b>spam</b></a>'
     doc = bindery.parse(DOC)
     doc.a.xmlns_attributes[u'ns']= u'urn:bogus'
     doc.a.xml_attributes[u'ns:foo'] = u'bar'
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #29
0
 def testSetAttribute4(self):
     DOC = '<a><b>spam</b></a>'
     EXPECTED = '<a><b xml:lang="en">spam</b></a>'
     doc = bindery.parse(DOC)
     # doc.a.b.xml_set_attribute((u"xml:lang"), u"en")
     doc.a.b.xml_attributes[u'xml:lang'] = u'en'
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #30
0
 def testSetAttribute3(self):
     from xml.dom import Node
     DOC = '<a b="spam"><b>spam</b></a>'
     EXPECTED = '<a b="eggs"><b>spam</b></a>'
     doc = bindery.parse(DOC)
     doc.a[ATTRIBUTE_NODE, None, u'b'] = u'eggs'
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #31
0
 def render(self):
     #Copy attachments to dir
     req = urllib2.Request(self.rest_uri, headers={'Accept': ATTACHMENTS_IMT})
     resp = urllib2.urlopen(req)
     doc = bindery.parse(resp, model=ATTACHMENTS_MODEL)
     for attachment in (doc.attachments.attachment or ()):
         print attachment
     return
Example #32
0
 def test_parse_with_file_path(self):
     """Parse with file path"""
     fname = tempfile.mktemp('.xml')
     fout = open(fname, 'w')
     fout.write(MONTY_XML)
     fout.close()
     doc = parse(fname)
     self.run_checks(doc)
Example #33
0
 def Xtest_parse_with_url(self):
     doc = parse(TEST_URL)
     #Minimal node testing
     self.assertEqual(len(doc.xml_children), 1)
     self.assertEqual(doc.xml_children[0].xml_type, tree.element.xml_type)
     self.assertEqual(doc.xml_children[0].xml_qname, 'disclaimer')
     self.assertEqual(doc.xml_children[0].xml_namespace, None)
     self.assertEqual(doc.xml_children[0].xml_prefix, None,)
Example #34
0
def test_xslt():
    url = server() + "akara.xslt?" + urllib.urlencode({"@xslt": XSLT_URL})
    req = urllib2.Request(url)
    req.add_header("Content-Type", "text/xml")
    response = urllib2.urlopen(req, XML_DATA)

    doc = bindery.parse(response)
    assert str(doc.html.head.title) == "Document Title", repr(str(doc.html.head.title))
Example #35
0
 def test_xpath1(self):
     """Test AVT"""
     doc = bindery.parse(self.DOC)
     self.assertEqual(doc.xml_select(u'ns:friends/ns:bff', prefixes=self.PREFIXES), u'Suzie Q')
     self.assertEqual(
         [ unicode(f.xml_select(u'concat(@rank, ": ", .)')) for f in doc.friends.bff ],
         [u'1: Suzie Q', u'2: Betty Boost'])
     return
Example #36
0
 def test_parse_with_file_path(self):
     """Parse with file path"""
     fname = tempfile.mktemp('.xml')
     fout = open(fname, 'w')
     fout.write(MONTY_XML)
     fout.close()
     doc = parse(fname)
     self.run_checks(doc)
Example #37
0
 def __init__(self, schdoc):
     from amara import bindery
     dispatcher.__init__(self)
     self.model_document = bindery.parse(schdoc)
     self.model_document.xml_model.prefixes = top_namespaces(self.model_document)
     self.rules = []
     self.setup_model()
     return
Example #38
0
 def __init__(self, parent):
     self._init_ctrls(parent)
     # replace the file name below
     fileName = 'Musica/crdo-NRU_F4_10_AGRICULTURAL_ACTIVITIES.xml'
     file = os.path.join(os.getcwd(), fileName)
     self.xmlDoc = bindery.parse(file)
    
     self.styledTextCtrl1.AddTextUTF8(self.xmlDoc.xml(indent=u'yes'))
Example #39
0
def ejsonize(isrc):
    '''
    Convert Atom syntax to a dictionary
    Note: the conventions used are designed to simplify conversion to Exhibit JSON
    (see: http://www.ibm.com/developerworks/web/library/wa-realweb6/ ; listing 3)
    '''
    doc = bindery.parse(isrc, model=FEED_MODEL)
    def process_entry(e):
        known_elements = [u'id', u'title', u'link', u'author', u'category', u'updated', u'content', u'summary']
        data = {
            u"id": unicode(e.id),
            #XXX Shall we use title for label?
            u"label": unicode(e.id),
            u"type": u"Entry",
            u"title": unicode(e.title),
            u"link": first_item([ l.href for l in e.link if l.rel in [None, u"alternate"] ], []),
            #Nested list comprehension to select the alternate link,
            #then select the first result ([0]) and gets its href attribute
            u"authors": [ unicode(a.name) for a in iter(e.author or []) ],
            #Nested list comprehension to create a list of category values
            u"categories": [ unicode(c.term) for c in iter(e.category or []) ],
            u"updated": unicode(e.updated),
            u"summary": unicode(e.summary),
        }
        if not data[u"categories"]: del data[u"categories"]
        if e.summary is not None:
            data[u"summary"] = unicode(e.summary)
        if e.content is not None:
            try:
                data[u"content_src"] = unicode(e.content.src)
            except AttributeError:
                data[u"content_text"] = deserialize_text_construct(e.content)
        for child in e.xml_elements:
            if child.xml_namespace != ATOM_NAMESPACE and child.xml_local not in known_elements:
                data[child.xml_local] = unicode(child)
        return data

    try:
        doc_entries = iter(doc.feed.entry)
        feedinfo = {
            u"id": unicode(doc.feed.id),
            #XXX Shall we use title for label?
            u"label": unicode(doc.feed.id),
            u"type": u"Feed",
            u"title": unicode(doc.feed.title),
            u"link": first_item([ l.href for l in doc.feed.link if l.rel in [None, u"alternate"] ], []),
            u"authors": [ unicode(a.name) for a in iter(doc.feed.author or []) ],
            u"updated": unicode(doc.feed.updated),
        }
    except AttributeError:
        try:
            doc_entries = iter(doc.entry)
            feedinfo = None
        except AttributeError:
            #FIXME L10N
            raise ValueError("Does not appear to be a valid Atom file")

    return [ process_entry(e) for e in doc_entries ]
Example #40
0
 def test_nasty_xml_1(self):
     """XML with 2 elements with same local name and different NS on same parent"""
     doc = parse(NASTY_NS_XML1)
     self.assertEqual(len(doc.top.xml_children), 5)
     self.assertEqual(len(list(doc.top.monty)), 1)
     self.assertEqual(len(list(doc.top.monty_)), 1)
     self.assertEqual(doc.top.monty.xml_namespace, u"urn:bogus:a")
     self.assertEqual(doc.top.monty_.xml_namespace, u"urn:bogus:b")
     self.assertEqual(doc.top.monty.xml_following_sibling.xml_following_sibling, doc.top.monty_)
Example #41
0
 def testInsertBefore1(self):
     DOC = "<a><b>eggs</b></a>"
     EXPECTED = '<a><b>spam</b><b>eggs</b></a>'
     doc = bindery.parse(DOC)
     new = doc.xml_element_factory(None, u'b')
     new.xml_append(doc.xml_text_factory(u'spam'))
     doc.a.xml_insert(0, new)
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #42
0
def test_xslt():
    url = server() + "akara.xslt?" + urllib.urlencode({"@xslt": XSLT_URL})
    req = urllib2.Request(url)
    req.add_header("Content-Type", "text/xml")
    response = urllib2.urlopen(req, XML_DATA)

    doc = bindery.parse(response)
    assert str(doc.html.head.title) == "Document Title", repr(
        str(doc.html.head.title))
Example #43
0
 def render(self):
     #Copy attachments to dir
     req = urllib2.Request(self.rest_uri,
                           headers={'Accept': ATTACHMENTS_IMT})
     resp = urllib2.urlopen(req)
     doc = bindery.parse(resp, model=ATTACHMENTS_MODEL)
     for attachment in (doc.attachments.attachment or ()):
         print attachment
     return
Example #44
0
 def test_xpath1(self):
     """Test AVT"""
     doc = bindery.parse(self.DOC)
     self.assertEqual(doc.xml_select(u'ns:friends/ns:bff'), u'Suzie Q')
     self.assertEqual([
         unicode(f.xml_select(u'concat(@rank, ": ", .)'))
         for f in doc.friends.bff
     ], [u'1: Suzie Q', u'2: Betty Boost'])
     return
Example #45
0
 def test_xpath(self):
     doc = bindery.parse(self.MONTY_XML)
     m = doc.monty
     p1 = doc.monty.python
     self.assertEqual(p1.xml_select(u'string(@spam)'), u'eggs')
     for p, line in zip(doc.xml_select(u'//python'), self.lines_py):
         output = cStringIO.StringIO()
         xml_print(p, stream=output)
         self.assertEqual(output.getvalue(), line)
Example #46
0
 def test_xpath(self):
     doc = bindery.parse(self.MONTY_XML)
     m = doc.monty
     p1 = doc.monty.python
     self.assertEqual(p1.xml_select(u'string(@spam)'), u'eggs')
     for p, line in zip(doc.xml_select(u'//python'), self.lines_py):
         output = cStringIO.StringIO()
         xml_print(p, stream=output)
         self.assertEqual(output.getvalue(), line)
Example #47
0
    def parseXMLDocument(self, path):
        i=0
        
        self.XML_W = lookup("xml")
        try:
            self.doc = bindery.parse(path)
        except amara.lib.IriError:
            self.doc = bindery.parse(path, standalone=True)
            
        #self.control.SetText(open(path).read())
        self.control.EmptyUndoBuffer()
        self.control.Colourise(0, -1)

        # line numbers in the margin
        self.control.SetMarginType(1, wx.stc.STC_MARGIN_NUMBER)
        self.control.SetMarginWidth(1, 25)

        self.control.SetValue(self.doc.xml_encode(self.XML_W).decode("utf-8").replace("<S id=","\n <S id=").replace("&lt;", u"\u2039").replace("&gt;", u"\u203A"))
Example #48
0
 def testInsertAfter3(self):
     DOC = "<a><b>spam</b><c>ham</c><c>pork</c></a>"
     EXPECTED = "<a><b>spam</b><c>eggs</c><c>ham</c><c>pork</c></a>"
     doc = bindery.parse(DOC)
     new = doc.xml_element_factory(None, u'c')
     new.xml_append(doc.xml_text_factory(u'eggs'))
     doc.a.xml_insert(doc.a.xml_index(doc.a.b) +1, new)
     self.compare_output(doc, XMLDECL+EXPECTED)
     return
Example #49
0
 def test_parse_with_stream(self):
     """Parse with stream"""
     fname = tempfile.mktemp('.xml')
     fout = open(fname, 'w')
     fout.write(MONTY_XML)
     fout.close()
     fout = open(fname, 'r')
     doc = parse(fout)
     fout.close()
     self.run_checks(doc)
Example #50
0
 def test_bindery(self):
     doc = bindery.parse(self.MONTY_XML)
     m = doc.monty
     p1 = doc.monty.python #or m.python; p1 is just the first python element
     self.assertEqual(p1.xml_attributes[(None, u'spam')], u'eggs')
     self.assertEqual(p1.spam, u'eggs')
     
     for p, line in zip(doc.monty.python, self.lines_py): #The loop will pick up both python elements
         output = cStringIO.StringIO()
         xml_print(p, stream=output)
         self.assertEqual(output.getvalue(), line)
Example #51
0
 def testXml(self):
     
     tmp = self.control.GetValue().encode("utf-8")
     
     try:
         self.doc = bindery.parse(tmp)
     except bindery.Error as e:
         line= e.lineNumber
         res = self.errorDialog(line)
         return res
     return 1
Example #52
0
 def test_avt1(self):
     """Test AVT"""
     doc = bindery.parse(self.DOC)
     self.assertEqual(doc.xml_avt(u'Hello, {friends/bff}'), u'Hello, Suzie Q')
     self.assertEqual(
         [ f.xml_avt(u'Big up, {.}, rank {@rank}') for f in doc.friends.bff ],
         [u'Big up, Suzie Q, rank 1', u'Big up, Betty Boost, rank 2'])
     self.assertEqual(
         [ f.xml_avt(u'{@rank}: {.}') for f in sorted(doc.friends.bff, key=attrgetter("rank")) ],
         [u'1: Suzie Q', u'2: Betty Boost'])
     return
Example #53
0
def test_charsearch():
    url = server() + "akara.unicode.search?q=DAGGER"
    doc = bindery.parse(urllib2.urlopen(url))
    names = set()
    see_alsos = set()
    for child in doc.xml_select(u"characters/character"):
        names.add(child.name)
        see_alsos.add(child.see_also)
    assert names == set(["DAGGER", "DOUBLE DAGGER"]), names
    assert see_alsos == set(
        ["http://www.fileformat.info/info/unicode/char/2020/index.htm",
         "http://www.fileformat.info/info/unicode/char/2021/index.htm"]), see_alsos
Example #54
0
File: oai.py Project: dpla/zen
    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]
Example #55
0
 def test_namespace_free_xhtml3(self):
     'namespace-free XHTML' + '...as XML with pretty print'
     doc = self._build_namespace_free_xhtml()
     s = cStringIO.StringIO()
     xml_print(doc, stream=s, indent=True)
     out = s.getvalue()
     #self.assertEqual(out, ATOMENTRY1)
     diff = treecompare.xml_diff(out, XHTML_EXPECTED_3, whitespace=False)
     diff = '\n'.join(diff)
     self.assertFalse(diff, msg=(None, diff))
     #Make sure we can parse the result
     doc2 = bindery.parse(out)
Example #56
0
 def test_avt1(self):
     """Test AVT"""
     doc = bindery.parse(self.DOC)
     #FIXME: try to work it so , prefixes= is not needed for this case (declaration in scope)
     self.assertEqual(doc.xml_avt(u'Hello, {ns:friends/ns:bff}', prefixes=self.PREFIXES), u'Hello, Suzie Q')
     self.assertEqual(
         [ f.xml_avt(u'Big up, {.}, rank {@rank}') for f in doc.friends.bff ],
         [u'Big up, Suzie Q, rank 1', u'Big up, Betty Boost, rank 2'])
     self.assertEqual(
         [ f.xml_avt(u'{@rank}: {.}') for f in sorted(doc.friends.bff, key=attrgetter("rank")) ],
         [u'1: Suzie Q', u'2: Betty Boost'])
     return