def handler(req): url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=' ## uris should be of the form ## http://rguha.ath.cx/~rguha/cicc/rest/depict/SMILES uriParts = req.uri.split('/') ids = ','.join([x.strip() for x in uriParts[-1].split(',')]) url = url+ids if req.method not in ['GET']: req.err_headers_out['Allow'] = 'GET' raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED result = '' headingCounts = {} narticle = 0 data = ''.join(urllib.urlopen(url).readlines()) doc = XML(data) for article in doc.findall('PubmedArticle'): narticle += 1 for mh in article.findall('MedlineCitation/MeshHeadingList/MeshHeading/DescriptorName'): if mh.text in headingCounts.keys(): headingCounts[mh.text] += 1 else: headingCounts[mh.text] = 1 ## most frequent first headingCounts = sorted(headingCounts.items(), key = operator.itemgetter(1), reverse=True) for key,item in headingCounts: result += '%s # %d/%d\n' % (key, item, narticle) req.content_type = 'text/plain'; req.write(result) return apache.OK
def convert(self, data, cache, **kwargs): bodydom = Element('div') kmldom = XML(data) ns = kmldom.tag.strip('kml') placemarks = kmldom.findall('.//%sPlacemark' % ns) for placemark in placemarks: titles = placemark.findall(ns + 'name') for title in titles: t = Element('h2') t.text = title.text bodydom.append(t) descriptions = placemark.findall(ns+'description') for desc in descriptions: if desc.text: try: text = desc.text.encode('ascii', 'xmlcharrefreplace').strip() except: text = desc.text.strip() text = sanitize(text) d = XML('<div>' + text.encode('ascii', 'xmlcharrefreplace') + '</div>') bodydom.append(d) body = tostring(bodydom) cache.setData(body) return cache
def convert(self, data, cache, **kwargs): bodydom = Element('div') kmldom = XML(data) ns = kmldom.tag.strip('kml') placemarks = kmldom.findall('.//%sPlacemark' % ns) for placemark in placemarks: titles = placemark.findall(ns + 'name') for title in titles: t = Element('h2') t.text = title.text bodydom.append(t) descriptions = placemark.findall(ns + 'description') for desc in descriptions: if desc.text: try: text = desc.text.encode('ascii', 'xmlcharrefreplace').strip() except: text = desc.text.strip() text = sanitize(text) d = XML('<div>' + text.encode('ascii', 'xmlcharrefreplace') + '</div>') bodydom.append(d) body = tostring(bodydom) cache.setData(body) return cache
def DataFeedQuery(self,start_date,end_date,table_id,report_type,parse_as="flat",sort='',filters='',max_results='50',chart_type="table"): m_indx = report_type.index("m-") d_indx = report_type.index("d-") dimensions = report_type[d_indx+2:m_indx-1] for d in dimensions.split(","): d = "ga:" + d +"," dim = d[:-1] metrics = report_type[m_indx+2:] for m in metrics.split(","): m = "ga:" + m +"," metr = m[:-1] #try: print "TEST3" if max_results == '': max_results = 50 self.data_feed_query = gdata.analytics.client.DataFeedQuery({ 'ids': table_id, 'start-date': start_date, 'end-date': end_date, 'dimensions':dim, 'metrics':metr, 'sort': sort, 'filters': filters, 'max-results': max_results}) print self.data_feed_query self.data_feed = self.my_client.GetDataFeed(self.data_feed_query) self.parsed_data_feed = XML(str(self.data_feed)) print self.parsed_data_feed if parse_as == "raw": return self.parsed_data_feed,len(self.parsed_data_feed.getiterator('{http://www.w3.org/2005/Atom}entry')),True else: return self.ParseDataFeedResults(self.parsed_data_feed,parse_as),len(self.parsed_data_feed.getiterator('{http://www.w3.org/2005/Atom}entry')),True """except:
def process_rss_feed(request, link_id_encoded, website_encode_id): from atrinsic.base.models import Link, Website from atrinsic.web.helpers import base36_decode from elementtree.ElementTree import XML, tostring import urllib2 link = Link.objects.get(pk=base36_decode(link_id_encoded)) website = Website.objects.get(pk=base36_decode(website_encode_id)) tracking_url = link.track_html_ape(website, link_only=True) try: raw_response = urllib2.urlopen(link.link_content) except: return AQ_render_to_response(request, 'base/custom_error.html', { 'errmsg': RSS_TIMEOUT, }, context_instance=RequestContext(request)) tree = XML(raw_response.read()) for node in tree.getiterator('link'): domain_position = node.text.find(".com") + 4 node.text = tracking_url + "&url=" + node.text[domain_position:] return render_to_response("blank_xml_template.html", {"XML": tostring(tree)}, mimetype="application/xhtml+xml")
def extract_title(data): kmldom = XML(data) ns = kmldom.tag.strip('kml') titles = kmldom.findall('.//%sname' % ns) if titles: return titles[0].text.strip() else: return 'N/A'
def extractfeatures_from_file(data): kmldom = XML(data) ns = kmldom.tag.strip('kml') points = kmldom.findall('.//%sPoint' % ns) lines = kmldom.findall('.//%sLineString' % ns) polygons = kmldom.findall('.//%sPolygon' % ns) mpoint = [] mline =[] mpoly = [] for point in points: coordinates = point.findall('.//%scoordinates' % ns) for coordinate in coordinates: latlon = coordinate.text.strip().split(',') coords = [float(c) for c in latlon] try: p = Point(coords) mpoint.append(p) except: logger.info('invalid point geometry: %s' % coordinates[:10] ) for line in lines: coordinates = line.findall('.//%scoordinates' % ns) for coordinate in coordinates: latlons = coordinate.text.split() coords = [] for latlon in latlons: coords.append([float(c) for c in latlon.split(',')]) try: l = LineString(coords) mline.append(l) except: logger.info('invalid linestring geometry: %s' % coordinates[:10] ) for polygon in polygons: coordinates = polygon.findall('.//%scoordinates' % ns) for coordinate in coordinates: latlons = coordinate.text.split() coords = [] for latlon in latlons: coords.append([float(c) for c in latlon.split(',')]) try: l = Polygon(coords) mpoly.append(l) except: logger.info('invalid polygon geometry: %s' % coordinates[:10] ) result = {'MultiPoint':None, 'MultiLineString':None, 'MultiPolygon':None} if mpoint: result['MultiPoint'] = MultiPoint(mpoint) if mline: result['MultiLineString'] = MultiLineString(mline) if mpoly: result['MultiPolygon'] = MultiPolygon(mpoly) return result
def extract_description(data): kmldom = XML(data) ns = kmldom.tag.strip('kml') descriptions = kmldom.findall('.//%sdescription' % ns) desc = '' for description in descriptions: if 'Double click to zoom in' != description.text.strip(): desc += description.text.strip() return desc
def create(self, trans, payload, **kwd): """ POST /api/forms Creates a new form. """ if not trans.user_is_admin(): trans.response.status = 403 return "You are not authorized to create a new form." xml_text = payload.get('xml_text', None) if xml_text is None: trans.response.status = 400 return "Missing required parameter 'xml_text'." #enhance to allow creating from more than just xml form_definition = form_factory.from_elem(XML(xml_text)) trans.sa_session.add(form_definition) trans.sa_session.flush() encoded_id = trans.security.encode_id(form_definition.id) item = form_definition.to_dict(view='element', value_mapper={ 'id': trans.security.encode_id, 'form_definition_current_id': trans.security.encode_id }) item['url'] = url_for('form', id=encoded_id) return [item]
def test_input_value_wrapper(tool): parameter = IntegerToolParameter( tool, XML('<param name="blah" type="integer" size="4" value="10" min="0" />') ) wrapper = InputValueWrapper(parameter, "5") assert str(wrapper) == "5"
def __test_arbitrary_path_rewriting(self): self.job.parameters = [ JobParameter(name="index_path", value="\"/old/path/human\"") ] xml = XML('''<param name="index_path" type="select"> <option value="/old/path/human">Human</option> <option value="/old/path/mouse">Mouse</option> </param>''') parameter = SelectToolParameter(self.tool, xml) def get_field_by_name_for_value(name, value, trans, other_values): assert value == "/old/path/human" assert name == "path" return ["/old/path/human"] parameter.options = Bunch( get_field_by_name_for_value=get_field_by_name_for_value) self.tool.set_params({"index_path": parameter}) self.tool._command_line = "prog1 $index_path.fields.path" def test_path_rewriter(v): if v: v = v.replace("/old", "/new") return v self._set_compute_environment(path_rewriter=test_path_rewriter) command_line, extra_filenames = self.evaluator.build() self.assertEquals(command_line, "prog1 /new/path/human")
def test_conditional_evaluation(self): select_xml = XML( '''<param name="always_true" type="select"><option value="true">True</option></param>''' ) parameter = SelectToolParameter(self.tool, select_xml) conditional = Conditional() conditional.name = "c" conditional.test_param = parameter when = ConditionalWhen() when.inputs = {"thresh": self.tool.test_thresh_param()} when.value = "true" conditional.cases = [when] self.tool.set_params({"c": conditional}) self.job.parameters = [ JobParameter( name="c", value= '''{"thresh": 4, "always_true": "true", "__current_case__": 0}''' ) ] self.tool._command_line = "prog1 --thresh=${c.thresh} --test_param=${c.always_true}" self._set_compute_environment() command_line, extra_filenames = self.evaluator.build() self.assertEquals(command_line, "prog1 --thresh=4 --test_param=true")
def __split_models(self, xmlDoc): """generator that takes parameter xmlDoc and splits it into many xml files, with only one model per each""" elem = XML(xmlDoc) models = elem.find("Models") if models: elem.remove(models) for model in models: to_return = copy.deepcopy(elem) new_models = Element("Models") for a in models.attrib: new_models.attrib[a] = models.attrib[a] new_models.append(model) to_return.append(new_models) yield (model.attrib['id'], to_return) else: pass #TODO return error
def xml2ListOfDict_Tutorial(xml): """Same as previous function, but the hard (wrong) way.""" lis = [] for elt in XML(xml): dic = {} for child in elt: dic[child.tag] = child.text lis.append(dic) return lis
def quick_output(format, format_source=None, change_format_xml=None): test_output = ToolOutput("test_output") test_output.format = format test_output.format_source = format_source if change_format_xml: test_output.change_format = XML(change_format_xml) else: test_output.change_format = None return test_output
def xmltidy(text): """send back good lookin' xml (basically a syntax checker)""" try: elem = XML(text) text = tostring(elem) return text except: #print text return ''
def test_select_wrapper_simple_options(tool): xml = XML('''<param name="blah" type="select"> <option value="x">I am X</option> <option value="y" selected="true">I am Y</option> <option value="z">I am Z</option> </param>''') parameter = SelectToolParameter(tool, xml) wrapper = SelectToolParameterWrapper(parameter, "x", tool.app) assert str(wrapper) == "x" assert wrapper.name == "blah" assert wrapper.value_label == "I am X"
def process_rss_feed(request,link_id_encoded,website_encode_id): from atrinsic.base.models import Link,Website from atrinsic.web.helpers import base36_decode from elementtree.ElementTree import XML,tostring import urllib2 link = Link.objects.get(pk=base36_decode(link_id_encoded)) website = Website.objects.get(pk=base36_decode(website_encode_id)) tracking_url = link.track_html_ape(website,link_only=True) try: raw_response = urllib2.urlopen(link.link_content) except: return AQ_render_to_response(request, 'base/custom_error.html', { 'errmsg' : RSS_TIMEOUT, }, context_instance=RequestContext(request)) tree = XML(raw_response.read()) for node in tree.getiterator('link'): domain_position = node.text.find(".com") + 4 node.text = tracking_url + "&url=" + node.text[domain_position:] return render_to_response("blank_xml_template.html", {"XML":tostring(tree)}, mimetype="application/xhtml+xml")
def test_evaluation_of_optional_datasets(self): # Make sure optional dataset don't cause evaluation to break and # evaluate in cheetah templates as 'None'. select_xml = XML( '''<param name="input1" type="data" optional="true"></param>''') parameter = DataToolParameter(self.tool, select_xml) self.job.parameters = [JobParameter(name="input1", value=u'null')] self.tool.set_params({"input1": parameter}) self.tool._command_line = "prog1 --opt_input='${input1}'" self._set_compute_environment() command_line, extra_filenames = self.evaluator.build() self.assertEquals(command_line, "prog1 --opt_input='None'")
def _setup_blast_tool(tool, multiple=False): tool.app.write_test_tool_data("blastdb.loc", "val1\tname1\tpath1\nval2\tname2\tpath2\n") xml = XML( '''<param name="database" type="select" label="Nucleotide BLAST database" multiple="%s"> <options from_file="blastdb.loc"> <column name="value" index="0"/> <column name="name" index="1"/> <column name="path" index="2"/> </options> </param>''' % multiple) parameter = SelectToolParameter(tool, xml) return parameter
def param(self): if not self._param: multi_text = "" if self.multiple: multi_text = 'multiple="True"' optional_text = "" if self.optional: optional_text = 'optional="True"' template_xml = '''<param name="data2" type="data" ext="txt" %s %s></param>''' self.param_xml = XML(template_xml % (multi_text, optional_text)) self._param = basic.DataToolParameter(self.mock_tool, self.param_xml) return self._param
def _drilldown_parameter(tool): xml = XML( '''<param name="some_name" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true"> <options> <option name="Heading 1" value="heading1"> <option name="Option 1" value="option1"/> <option name="Option 2" value="option2"/> <option name="Heading 1" value="heading1"> <option name="Option 3" value="option3"/> <option name="Option 4" value="option4"/> </option> </option> <option name="Option 5" value="option5"/> </options> </param>''') parameter = DrillDownSelectToolParameter(tool, xml) return parameter
def param(self): if not self._param: multi_text = "" if self.multiple: multi_text = 'multiple="True"' optional_text = "" if self.optional: optional_text = 'optional="True"' options_text = self.options_xml data_ref_text = "" if self.set_data_ref: data_ref_text = 'data_ref="input_bam"' template_xml = '''<param name="my_name" type="%s" %s %s %s>%s</param>''' self.param_xml = XML(template_xml % (self.type, data_ref_text, multi_text, optional_text, options_text)) self._param = basic.SelectToolParameter(self.mock_tool, self.param_xml) return self._param
def param(self): if not self._param: multi_text = "" if self.multiple: multi_text = 'multiple="True"' optional_text = "" if self.optional: optional_text = 'optional="True"' data_ref_text = "" if self.set_data_ref: data_ref_text = 'data_ref="input_tsv"' template_xml = '''<param name="my_name" type="%s" %s %s %s %s></param>''' self.param_xml = XML(template_xml % (self.type, data_ref_text, multi_text, optional_text, self.other_attributes)) self._param = basic.ColumnListParameter(self.mock_tool, self.param_xml) self._param.ref_input = bunch.Bunch(formats=[ model.datatypes_registry.get_datatype_by_extension("tabular") ]) return self._param
def test_context(self): if self._test_context is None: option_xml = "" if self.filtered_param: option_xml = '''<options><filter type="data_meta" ref="data1" key="dbkey" /></options>''' param_xml = XML( '''<param name="data2" type="data" ext="txt">%s</param>''' % option_xml) self.param = basic.DataToolParameter( tool=self.tool, elem=param_xml, ) self._test_context = dataset_matcher.DatasetMatcher( trans=bunch.Bunch( app=self.app, get_current_user_roles=lambda: self.current_user_roles, workflow_building_mode=True, ), param=self.param, value=[], other_values=self.other_values) return self._test_context
def parsePodcast(podcastXML, config, filemode=None): """ Access to the podcast and return all information in a podcast object return None when no info available, when error raise MyCancel """ podcastInfo = PodcastInfo() podcastNode = XML(podcastXML) channelNode = podcastNode.find('channel') ### TODO handle when title is empty, use the program title titlePodcast = channelNode.findtext('title', '') titlePodcastAscii = titlePodcast.encode('ascii', 'ignore') if not titlePodcast: titlePodcast = 'UNDEFINED' titlePodcastAscii = 'UNDEFINED' else: titlePodcastAscii = getCroppedFilename(titlePodcastAscii) titlePodcastAscii = cleanString(titlePodcastAscii) ## TODO support the podcast named the same way... add a hashcode after the title, and a main podcast.xml file at the root # the target local directory targetDirectory = os.path.join(config.podcastDownloadPath, titlePodcastAscii) chandescription = channelNode.findtext('description', '') #chanImage = getXMLAttrText(channeldom, 'itunes:image', 'href') #### podcastInfo.title = titlePodcast podcastInfo.description = chandescription #podcastInfo.image = chanImage podcastInfo.titleAscii = titlePodcastAscii podcastInfo.targetDirectory = targetDirectory # Get the local info, and a link on the elemnt node, to be modified later podcastLocalInfo = getPodcastLocalItems(podcastInfo) # parse the item list #items = channeldom.getElementsByTagName('item') itemExist = channelNode.find('item') != None if not itemExist: # return empty mark return None #for item in items: for itemNode in channelNode.getiterator('item'): descr = '' type = '' length = '' title = itemNode.findtext('title', '') descr = itemNode.findtext('description', '') #pubDate = getXMLTagText(item, 'pubDate') #duration = getXMLTagText(item, 'itunes:duration') enclosureNode = itemNode.find('enclosure') if enclosureNode == None: continue # the url can be redirect, urllib follow this link for downlaod url = enclosureNode.get('url') if url == None: # when no url, continue continue # search in the local if the file is already here, already downloaded, and add it foundLocalItem = None for podlocalitem in podcastLocalInfo.itemsInfo: if podlocalitem.url == url: foundLocalItem = podlocalitem if foundLocalItem != None: podcastInfo.itemsInfo.append(foundLocalItem) continue # type is not always defined?!, can test with urllib when downloaded type = enclosureNode.get('type') # Length only used for information as list, exact size is found later during download length = enclosureNode.get('length') ##################### podItem = PodcastItem() podItem.title = title podItem.description = descr podItem.url = url podItem.type = type podItem.length = length podcastInfo.itemsInfo.append(podItem) # TODO ?? return when ! config.podcastDownload # init the titles and the filename, filelocation for podItem in podcastInfo.itemsInfo: # the display title title = podItem.title # when is local, don't process filenames and add >> in the titles if podItem.isLocal: # control that the file exist and fully downloaded, when not try to download ... if not os.path.exists(podItem.fileLocation): podItem.isLocal = False podItem.flagfinish = False elif podItem.flagfinish: title = '>> ' + title else: title = '<> ' + title # when not local, or not file found else: # TODO test that valid name, no special char, len > sufficient, and not always the same name, # TODO OR add something in the MEDIA xml file ## TODO when a podcast title is many time the same, use a hascode after the title filaname ### PROBELM avec la limite des path: lorsque fichier trop long, peut avec des equivalent...... # podcastInfo.useTitleForName or if filemode == "title": filename = podItem.title podcastInfo.useTitleForName else: filename = getLastStringPart( podItem.url.encode('ascii', 'ignore'), '/') filename = getBeforeStringPart(filename, '?') ### TODO test if this filename already exit in the list..?? -> if YES MARK it using a poditem flag, and use title... filename = getCroppedFilename(filename) # 42-4 filename = cleanString(filename) fileLocation = targetDirectory + '\\' + filename # set the properties podItem.filename = filename podItem.fileLocation = fileLocation # When the file exist but no entry in the xml add it here if os.path.exists(podItem.fileLocation): title = '>? ' + title # process size size = 0 if podItem.size != 0: size = podItem.size elif podItem.length and len(podItem.length) > 0: size = round(long(podItem.length) / 1000000.0, 1) # set size and title if size != 0: podItem.size = size title = title + ' (' + str(size) + 'Mo)' podcastInfo.itemFilenames.append(podItem.filename) podcastInfo.titles2display.append(title) # search in the folder if already downloaded file are available, and not refferenced in the xml appendLocalMedia(podcastInfo, podcastLocalInfo) # return the podcast info with items return podcastInfo
def test_thresh_param(self): elem = XML('<param name="thresh" type="integer" value="5" />') return IntegerToolParameter(self, elem)
<name>John Doe</name> </author> <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id> <entry> <title type="xhtml">Atom-Powered <br/> Robots Run Amok</title> <link href="http://example.org/2003/12/13/atom03"/> <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> <updated>2003-12-13T18:30:02Z</updated> <summary>Some text.</summary> </entry> </feed>""" etree = ElementTree(file=StringIO.StringIO(content)) feed = XML(content) print etree print feed #print len(feed) #print feed[0] #print feed.keys() ATOM = "http://www.w3.org/2005/Atom" entry = etree.getiterator('{%s}entry'%ATOM)[0] new_lin = SubElement(entry, '{%s}link'%ATOM) new_lin.set('rel', 'source') new_lin.set('href', 'http://somthing.org')
typeID = typeID_List[index] typeID_Progress = 100.0 * ((1.0 * index) / typeID_List_length) if (verbose == 1): print "Getting eve-central data for typeID ", typeID_List[ index], " - ", typeName_List[index] contents = eve_central_query_market_data(regionID, option) if (verbose == 1): print "Response from eve-central:" print contents print "END RESPONSE" if (contents[0:5] == "<?xml"): # Parse the XML response from EVE-Central # See usage here for ElementTree: http://effbot.org/zone/element-index.htm tree = XML(contents) # From a string if (verbose == 1): print "XML received from eve-central:" print tree #print "XML Element count = ", len(tree) # the tree root is the toplevel html element tree_map = dict((c, p) for p in tree.getiterator() for c in p) root = tree for c in root.getchildren(): if (verbose == 1): print c.tag for d in c.getchildren(): if (verbose == 1): print " ", d.tag
def parsePodcast(podcastXML, config, filemode=None): """ Access to the podcast and return all information in a podcast object return None when no info available, when error raise MyCancel """ podcastInfo = PodcastInfo() podcastNode = XML(podcastXML) channelNode = podcastNode.find('channel') ### TODO handle when title is empty, use the program title titlePodcast = channelNode.findtext('title', '') titlePodcastAscii = titlePodcast.encode('ascii', 'ignore') if not titlePodcast: titlePodcast = 'UNDEFINED' titlePodcastAscii = 'UNDEFINED' else: titlePodcastAscii = getCroppedFilename(titlePodcastAscii) titlePodcastAscii = cleanString(titlePodcastAscii) ## TODO support the podcast named the same way... add a hashcode after the title, and a main podcast.xml file at the root # the target local directory targetDirectory = os.path.join(config.podcastDownloadPath, titlePodcastAscii) chandescription = channelNode.findtext('description', '') #chanImage = getXMLAttrText(channeldom, 'itunes:image', 'href') #### podcastInfo.title = titlePodcast podcastInfo.description = chandescription #podcastInfo.image = chanImage podcastInfo.titleAscii = titlePodcastAscii podcastInfo.targetDirectory = targetDirectory # Get the local info, and a link on the elemnt node, to be modified later podcastLocalInfo = getPodcastLocalItems(podcastInfo) # parse the item list #items = channeldom.getElementsByTagName('item') itemExist = channelNode.find('item') != None if not itemExist: # return empty mark return None #for item in items: for itemNode in channelNode.getiterator('item'): descr = '' type = '' length = '' title = itemNode.findtext('title', '') descr = itemNode.findtext('description', '') #pubDate = getXMLTagText(item, 'pubDate') #duration = getXMLTagText(item, 'itunes:duration') enclosureNode = itemNode.find('enclosure') if enclosureNode == None: continue # the url can be redirect, urllib follow this link for downlaod url = enclosureNode.get('url') if url == None: # when no url, continue continue # search in the local if the file is already here, already downloaded, and add it foundLocalItem = None for podlocalitem in podcastLocalInfo.itemsInfo: if podlocalitem.url == url: foundLocalItem = podlocalitem if foundLocalItem != None: podcastInfo.itemsInfo.append(foundLocalItem) continue # type is not always defined?!, can test with urllib when downloaded type = enclosureNode.get('type') # Length only used for information as list, exact size is found later during download length = enclosureNode.get('length') ##################### podItem = PodcastItem() podItem.title = title podItem.description = descr podItem.url = url podItem.type = type podItem.length = length podcastInfo.itemsInfo.append(podItem) # TODO ?? return when ! config.podcastDownload # init the titles and the filename, filelocation for podItem in podcastInfo.itemsInfo : # the display title title = podItem.title # when is local, don't process filenames and add >> in the titles if podItem.isLocal: # control that the file exist and fully downloaded, when not try to download ... if not os.path.exists(podItem.fileLocation) : podItem.isLocal = False podItem.flagfinish = False elif podItem.flagfinish: title = '>> ' + title else: title = '<> ' + title # when not local, or not file found else : # TODO test that valid name, no special char, len > sufficient, and not always the same name, # TODO OR add something in the MEDIA xml file ## TODO when a podcast title is many time the same, use a hascode after the title filaname ### PROBELM avec la limite des path: lorsque fichier trop long, peut avec des equivalent...... # podcastInfo.useTitleForName or if filemode == "title": filename = podItem.title podcastInfo.useTitleForName else: filename = getLastStringPart(podItem.url.encode('ascii', 'ignore'), '/') filename = getBeforeStringPart(filename, '?') ### TODO test if this filename already exit in the list..?? -> if YES MARK it using a poditem flag, and use title... filename = getCroppedFilename(filename) # 42-4 filename = cleanString(filename) fileLocation = targetDirectory + '\\' + filename # set the properties podItem.filename = filename podItem.fileLocation = fileLocation # When the file exist but no entry in the xml add it here if os.path.exists(podItem.fileLocation) : title = '>? ' + title # process size size = 0 if podItem.size != 0 : size = podItem.size elif podItem.length and len(podItem.length) > 0: size = round(long(podItem.length) / 1000000.0 , 1) # set size and title if size != 0: podItem.size = size title = title + ' (' + str(size) + 'Mo)' podcastInfo.itemFilenames.append(podItem.filename) podcastInfo.titles2display.append(title) # search in the folder if already downloaded file are available, and not refferenced in the xml appendLocalMedia(podcastInfo, podcastLocalInfo) # return the podcast info with items return podcastInfo
def AccountFeedQuery(self,max_results='50'): # DataFeedQuery simplifies constructing API queries and uri encodes params. self.account_feed_query = AccountFeedQuery({'max-results': max_results}) self.account_feed = self.my_client.GetDataFeed(self.account_feed_query) self.parsed_account_feed = XML(str(self.account_feed)) return self.parsed_account_feed
class AqAnalytics(object): def __init__(self,user_name,password): self.SOURCE_APP_NAME = 'Atrinsic-Network' self.my_client = gdata.analytics.client.AnalyticsClient(source=self.SOURCE_APP_NAME) self.account_feed = '' self.data_feed = '' self.table_ids = None self.user_name = user_name self.password = password def authenticate(self): try: self.my_client.client_login(self.user_name, self.password,self.SOURCE_APP_NAME,'analytics') except gdata.client.BadAuthentication: raise Exception('Invalid user credentials given.') except gdata.client.Error: raise Exception('Invalid user credentials given.') return True def AccountFeedQuery(self,max_results='50'): # DataFeedQuery simplifies constructing API queries and uri encodes params. self.account_feed_query = AccountFeedQuery({'max-results': max_results}) self.account_feed = self.my_client.GetDataFeed(self.account_feed_query) self.parsed_account_feed = XML(str(self.account_feed)) return self.parsed_account_feed def GetSiteList(self,feed_query = None): if feed_query == None: feed_query = self.AccountFeedQuery() complete_feed = [] feed_details = {} for elem in feed_query: if list(elem): for node in elem: NS1='{http://schemas.google.com/analytics/2009}' NS2='{http://www.w3.org/2005/Atom}' if node.tag[len(NS1):] == "tableId": feed_details['table_id']=node.text elif node.tag[len(NS2):] == "title": feed_details['site_url']=node.text if feed_details: complete_feed.append(feed_details) feed_details = {} return complete_feed def DataFeedQuery(self,start_date,end_date,table_id,report_type,parse_as="flat",sort='',filters='',max_results='50',chart_type="table"): m_indx = report_type.index("m-") d_indx = report_type.index("d-") dimensions = report_type[d_indx+2:m_indx-1] for d in dimensions.split(","): d = "ga:" + d +"," dim = d[:-1] metrics = report_type[m_indx+2:] for m in metrics.split(","): m = "ga:" + m +"," metr = m[:-1] #try: print "TEST3" if max_results == '': max_results = 50 self.data_feed_query = gdata.analytics.client.DataFeedQuery({ 'ids': table_id, 'start-date': start_date, 'end-date': end_date, 'dimensions':dim, 'metrics':metr, 'sort': sort, 'filters': filters, 'max-results': max_results}) print self.data_feed_query self.data_feed = self.my_client.GetDataFeed(self.data_feed_query) self.parsed_data_feed = XML(str(self.data_feed)) print self.parsed_data_feed if parse_as == "raw": return self.parsed_data_feed,len(self.parsed_data_feed.getiterator('{http://www.w3.org/2005/Atom}entry')),True else: return self.ParseDataFeedResults(self.parsed_data_feed,parse_as),len(self.parsed_data_feed.getiterator('{http://www.w3.org/2005/Atom}entry')),True """except: 'body', 'headers', 'message', 'reason', 'status' error = sys.exc_info()[1] error_xml = XML(error.message[error.message.index("<"):]) error_list = [] for node in error_xml.getiterator("{http://schemas.google.com/g/2005}error"): for err in node.getchildren(): if err.tag == "{http://schemas.google.com/g/2005}internalReason": error_list.append(err.text) return (False,False,error_list)""" def ParseDataFeedResults(self,data_feed_query,parse_as): NS = '{http://schemas.google.com/analytics/2009}' e_list = [] headers = [] self.parse_as = parse_as for elem in data_feed_query.getiterator('{http://www.w3.org/2005/Atom}entry'): if parse_as == "dict": e_dict = {} elif parse_as == "array" or parse_as == "flat": e_dict = [] else: e_dict = "" for node in elem.getchildren(): e_dict,headers = self.SuperAppend(node,e_dict,headers) if parse_as == "flat": for x in e_dict: e_list.append(x) else: e_list.append(e_dict) return e_list,headers def SuperAppend(self,node,e_dict,headers): if node.attrib.has_key('value'): try: headers.index(node.attrib['name'][3:]) except: headers.append(node.attrib['name'][3:]) if self.parse_as == "dict": e_dict[node.attrib['name'][3:]] = node.attrib['value'] elif self.parse_as == "array" or self.parse_as == "flat": e_dict.append(node.attrib['value']) else: e_dict = node.attrib['value'] return e_dict,headers
for index in range(1,typeID_List_length-1): typeID = typeID_List[index] typeID_Progress = 100.0 * ((1.0*index)/typeID_List_length) if (verbose == 1): print "Getting eve-central data for typeID ", typeID_List[index], " - ", typeName_List[index] contents = eve_central_query_market_data(regionID,option) if (verbose == 1): print "Response from eve-central:" print contents print "END RESPONSE" if (contents[0:5] == "<?xml"): # Parse the XML response from EVE-Central # See usage here for ElementTree: http://effbot.org/zone/element-index.htm tree = XML(contents) # From a string if (verbose == 1): print "XML received from eve-central:" print tree #print "XML Element count = ", len(tree) # the tree root is the toplevel html element tree_map = dict((c, p) for p in tree.getiterator() for c in p) root = tree for c in root.getchildren(): if (verbose == 1): print c.tag for d in c.getchildren(): if (verbose == 1): print " ", d.tag
def _parameter_for(self, **kwds): content = kwds["xml"] param_xml = XML( content ) return basic.ToolParameter.build( self.mock_tool, param_xml )
def xml2ListOfDict(xml): """Convert an xmlDoc string to a python list of dictionaries.""" return [ dict([(child.tag, child.text) for child in elt]) for elt in XML(xml) ]
def create(self, trans, payload, **kwd): """ POST /api/request_types Creates a new request type (external_service configuration). """ if not trans.user_is_admin(): trans.response.status = 403 return "You are not authorized to create a new request type (external_service configuration)." xml_text = payload.get('xml_text', None) if xml_text is None: trans.response.status = 400 return "Missing required parameter 'xml_text'." elem = XML(xml_text) request_form_id = payload.get('request_form_id', None) if request_form_id is None: trans.response.status = 400 return "Missing required parameter 'request_form_id'." request_form = trans.sa_session.query( trans.app.model.FormDefinition).get( trans.security.decode_id(request_form_id)) sample_form_id = payload.get('sample_form_id', None) if sample_form_id is None: trans.response.status = 400 return "Missing required parameter 'sample_form_id'." sample_form = trans.sa_session.query( trans.app.model.FormDefinition).get( trans.security.decode_id(sample_form_id)) external_service_id = payload.get('external_service_id', None) if external_service_id is None: trans.response.status = 400 return "Missing required parameter 'external_service_id'." external_service = trans.sa_session.query( trans.app.model.ExternalService).get( trans.security.decode_id(external_service_id)) request_type = request_type_factory.from_elem(elem, request_form, sample_form, external_service) #FIXME: move permission building/setting to separate abstract method call and #allow setting individual permissions by role (currently only one action, so not strictly needed) role_ids = payload.get('role_ids', []) roles = [ trans.sa_session.query(trans.model.Role).get( trans.security.decode_id(i)) for i in role_ids ] # if trans.app.security_agent.ok_to_display( trans.user, i ) ] permissions = {} if roles: #yikes, there has to be a better way? for k, v in trans.model.RequestType.permitted_actions.items(): permissions[trans.app.security_agent.get_action( v.action)] = roles if permissions: trans.app.security_agent.set_request_type_permissions( request_type, permissions) #flush objects trans.sa_session.add(request_type) trans.sa_session.flush() encoded_id = trans.security.encode_id(request_type.id) item = request_type.to_dict(view='element', value_mapper={ 'id': trans.security.encode_id, 'request_form_id': trans.security.encode_id, 'sample_form_id': trans.security.encode_id }) item['url'] = url_for('request_type', id=encoded_id) return [item]
class EpubDocument(object): """A class that parses and provides data about an ePub file""" def __init__(self, fname): # This is done according to this: # http://stackoverflow.com/questions/1388467/reading-epub-format print(("Opening:", fname)) try: self.book = zipfile.ZipFile(fname, "r") except zipfile.BadZipfile: raise ValueError("Invalid format") f = self.book.open('META-INF/container.xml') self.container = XML(f.read()) f.close() roots = self.container.findall( './/{urn:oasis:names:tc:opendocument:xmlns:container}rootfile') self.roots = [] for r in roots: self.roots.append(r.attrib['full-path']) opf = self.book.open(self.roots[0]) self.basepath = os.path.dirname(self.roots[0]) + "/" if self.basepath == '/': self.basepath = "" print(("BASEPATH:", self.basepath)) data = opf.read() self.opf = XML(data) opf.close() self.manifest = self.opf.find('{http://www.idpf.org/2007/opf}manifest') self.manifest_dict = {} for elem in self.manifest.findall( '{http://www.idpf.org/2007/opf}item'): self.manifest_dict[elem.attrib['id']] = self.basepath + \ elem.attrib['href'] self.spine = self.opf.find('{http://www.idpf.org/2007/opf}spine') self.tocentries = [] self.toc_id = self.spine.attrib.get('toc', None) if self.toc_id: self.toc_fn = self.manifest_dict[self.toc_id] print(("TOC:", self.toc_fn)) f = self.book.open(self.toc_fn) data = f.read() self.toc = XML(data) self.navmap = self.toc.find( '{http://www.daisy.org/z3986/2005/ncx/}navMap') # FIXME: support nested navpoints self.navpoints = self.navmap.findall( './/{http://www.daisy.org/z3986/2005/ncx/}navPoint') for np in self.navpoints: label = np.find( '{http://www.daisy.org/z3986/2005/ncx/}navLabel').find( '{http://www.daisy.org/z3986/2005/ncx/}text').text content = np.find( '{http://www.daisy.org/z3986/2005/ncx/}content').attrib['src'] if label and content: self.tocentries.append([label, content]) self.itemrefs = self.spine.findall( '{http://www.idpf.org/2007/opf}itemref') print(("IR:", self.itemrefs)) self.spinerefs = [ self.manifest_dict[item.attrib['idref']][len(self.basepath):] for item in self.itemrefs] # I found one book that has a spine but no navmap: # "Der schwarze Baal" from manybooks.net # Also another has more entries on the spine than on the navmap # (Dinosauria, from feedbooks). # So, we need to merge these suckers. I will assume it's not completely # insane and the spine is always more complete. spinerefs2 = [[x, x] for x in self.spinerefs] for te in self.tocentries: idx = self.spinerefs.index(te[1]) spinerefs2[idx] = te self.tocentries = spinerefs2 # if not self.tocentries: # # Alternative toc # self.tocentries = [[item.attrib['idref'], #self.manifest_dict[item.attrib['idref']][len(self.basepath):]] #for item in self.itemrefs] print((self.tocentries)) print((self.spinerefs)) def getData(self, path): """Return the contents of a file in the document""" path = "%s%s" % (self.basepath, path) try: f = self.book.open(path) except KeyError: # File missing in the zip return [] data = f.read() f.close() return data