class RefSeq(object): def __init__(self,xml): self._root = XML(xml) def cds_start_end(self): n = self._root.xpath('/GBSet/GBSeq/GBSeq_feature-table/GBFeature[GBFeature_key/text()="CDS"]') assert len(n) == 1, "expected exactly one CDS GBFeature_key node" s,e = _feature_se(n[0]) return s,e def cds_start(self): return self.cds_start_end()[0] def chr(self): return self._root.xpath('/GBSet/GBSeq/GBSeq_feature-table/GBFeature[' 'GBFeature_key/text()="source"]/GBFeature_quals' '/GBQualifier[GBQualifier_name/text()=' '"chromosome"]/GBQualifier_value')[0].text def exons(self): exon_nodes = self._root.xpath('/GBSet/GBSeq/GBSeq_feature-table/GBFeature[GBFeature_key="exon"]') return [ _feature_se(n) for n in exon_nodes ] def seq(self): return self._root.xpath('/GBSet/GBSeq/GBSeq_sequence')[0].text
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return is_deferred_card = bool(self.doc.xpath(u'//div[contains(text(), "Différé")]')) has_summary = False if is_deferred_card: coming_debit_date = None # get coming debit date for deferred_card date_string = Regexp(CleanText(u'//option[contains(text(), "détail des factures à débiter le")]'), r'(\d{2}/\d{2}/\d{4})', default=NotAvailable)(self.doc) if date_string: coming_debit_date = parse_d(date_string) while True: d = XML(self.browser.open(url).content) el = d.xpath('//dataBody') if not el: return el = el[0] s = unicode(el.text).encode('iso-8859-1') doc = fromstring(s) for tr in self._iter_transactions(doc): if tr.type == Transaction.TYPE_CARD_SUMMARY: has_summary = True if is_deferred_card and tr.type is Transaction.TYPE_CARD: tr.type = Transaction.TYPE_DEFERRED_CARD if not has_summary: if coming_debit_date: tr.date = coming_debit_date tr._coming = True yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) args = {} args['n10_nrowcolor'] = 0 args['operationNumberPG'] = el.find('operationNumber').text args['operationTypePG'] = el.find('operationType').text args['pageNumberPG'] = el.find('pageNumber').text args['idecrit'] = el.find('idecrit').text or '' args['sign'] = p['sign'][0] args['src'] = p['src'][0] url = '%s?%s' % (url.path, urlencode(args))
def parser(xml_tree): doc = XML(xml_tree) logger.info('return quantity %s' % len(doc.xpath('/PubmedArticleSet/PubmedArticle'))) parser_data = [] for branch in doc.xpath('/PubmedArticleSet/PubmedArticle'): pmid = branch.xpath('./MedlineCitation/PMID/text()') title = branch.xpath('.//Journal/Title/text()') publication = branch.xpath('.//PublicationTypeList//text()') pmid.extend(title) pmid.append('\n'.join(publication).strip()) pmid.append(get_factor(title[0].lower())) parser_data.append(pmid) return parser_data
def iter_transactions(self, coming): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return while True: d = XML(self.browser.readurl(url)) try: el = self.parser.select(d, '//dataBody', 1, 'xpath') except BrokenPageError: # No transactions. return s = StringIO(unicode(el.text).encode('iso-8859-1')) doc = self.browser.get_document(s) for tr in self._iter_transactions(doc, coming): if not tr._coming: coming = False yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) url = self.browser.buildurl(url.path, n10_nrowcolor=0, operationNumberPG=el.find('operationNumber').text, operationTypePG=el.find('operationType').text, pageNumberPG=el.find('pageNumber').text, idecrit=el.find('idecrit').text or '', sign=p['sign'][0], src=p['src'][0])
def parse(self, response): xml = XML(response.body) prds = xml.xpath('//Reference') for prd in prds: cat_no = first(prd.xpath('./Order_Code/text()'), None) d = { "brand": self.brand, "cat_no": cat_no, "cas": first(prd.xpath('./CAS_Registry_Number/text()'), None), "en_name": first(prd.xpath('./Reference_Standard/text()'), None), "info2": first(prd.xpath('./Storage/text()'), None), "info3": first(prd.xpath('./Quantity_per_vial/text()'), None), "info4": first(prd.xpath('./Price/text()'), None), "prd_url": f"https://crs.edqm.eu/db/4DCGI/View={first(prd.xpath('./Order_Code/text()'), '')}", } yield RawData(**d) price = first(prd.xpath('./Price/text()'), None) yield ProductPackage( brand=self.brand, cat_no=cat_no, package=first(prd.xpath('./Quantity_per_vial/text()'), None), price=price and price.replace('€', ''), currency='EUR', )
def fields_view_get(self, view_id=None, view_type='form', toolbar=False, submenu=False): res = super(Lead, self).fields_view_get(view_id, view_type, toolbar, submenu) if view_type == 'form': doc = XML(res['arch']) FIELDS = ['source_id', 'campaign_id', 'medium_id'] if not self.user_has_groups('sales_team.group_sale_manager'): for field in FIELDS: LABEL_STR = "//field[@name='{0}']".format(field) node = doc.xpath(LABEL_STR) if node: node = node[0] node.set( "options", "{'no_create': True, 'no_create_edit': True}") if field in res['fields']: setup_modifiers(node, res['fields'][field]) res['arch'] = tostring(doc, encoding='unicode') return res
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return while True: d = XML(self.browser.readurl(url)) try: el = self.parser.select(d, '//dataBody', 1, 'xpath') except BrokenPageError: # No transactions. return s = StringIO(unicode(el.text).encode('iso-8859-1')) doc = self.browser.get_document(s) for tr in self._iter_transactions(doc): yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) url = self.browser.buildurl( url.path, n10_nrowcolor=0, operationNumberPG=el.find('operationNumber').text, operationTypePG=el.find('operationType').text, pageNumberPG=el.find('pageNumber').text, idecrit=el.find('idecrit').text or '', sign=p['sign'][0], src=p['src'][0])
def iter_transactions(self): url = self.get_part_url() if url is None: # There are no transactions in this kind of account return is_deferred_card = bool( self.doc.xpath(u'//div[contains(text(), "Différé")]')) has_summary = False while True: d = XML(self.browser.open(url).content) el = d.xpath('//dataBody') if not el: return el = el[0] s = unicode(el.text).encode('iso-8859-1') doc = fromstring(s) for tr in self._iter_transactions(doc): if tr.type == Transaction.TYPE_CARD_SUMMARY: has_summary = True if is_deferred_card and tr.type is Transaction.TYPE_CARD: tr.type = Transaction.TYPE_DEFERRED_CARD if not has_summary: tr._coming = True yield tr el = d.xpath('//dataHeader')[0] if int(el.find('suite').text) != 1: return url = urlparse(url) p = parse_qs(url.query) args = {} args['n10_nrowcolor'] = 0 args['operationNumberPG'] = el.find('operationNumber').text args['operationTypePG'] = el.find('operationType').text args['pageNumberPG'] = el.find('pageNumber').text args['idecrit'] = el.find('idecrit').text or '' args['sign'] = p['sign'][0] args['src'] = p['src'][0] url = '%s?%s' % (url.path, urlencode(args))
def test_implied_slowpath_connectors(self): meta = { 'vnfs' : [{ 'type' : 'ltemmesim', 'fpaths' : [] },{ 'type' : 'ltegwsim', 'fpaths' : [], }], } bldr = vnf.ConnectionBuilder() vnf_xml = XML(VNF.render(meta)) bldr.implied_slowpath_connectors(vnf_xml) #print(tostring(vnf_xml, pretty_print=True)) connectors = vnf_xml.xpath('//connector') self.assertEqual(2, len(connectors)) destinations = vnf_xml.xpath('//connector/destination') self.assertEqual(1, len(destinations))
def parse_result( self, string, bs=False ): root = XML(string) sents = root.xpath(".//sentence") if not bs: return list( itertools.chain.from_iterable( re.findall("\u3000([^(]+)\(\w+\)""",sent.text) for sent in sents ) ) else: return list( ( re.findall("\u3000([^(]+)\(\w+\)""",sent.text) for sent in sents ) )
def test_single_vj_excluded_date(self): monday_of_desired_week = datetime.date(2018, 3, 5) with open(dirname(__file__) + "/single_vj.xml", mode='rb') as f: root = XML(f.read()) [vjelem] = root.xpath("//tx:VehicleJourney", namespaces=NAMESPACES) rtn = parse_single_vj_elem(vjelem, monday_of_desired_week) [ _privatecode, _jpref_id, _vjcode, _other_vjcode, _linecode, days_bitmask, _departuretime ] = rtn self.assertEqual(days_bitmask, 0)
def test_xml_list(self): full_list = self._prepare_listing() full_list.create_xml() ns = {"ns2": "http://www.minfin.fgov.be/ClientListingConsignment"} xml = XML(base64.b64decode(full_list.file_save)) xml_vat_amount = xml.xpath( '//ns2:Client[ns2:CompanyVATNumber[text() = "0477472701"]]' "/ns2:VATAmount", namespaces=ns, )[0].text self.assertEqual("94.50", xml_vat_amount)
def test_xml_list(self): wizard = self.env["partner.vat.intra"].create({ "period_code": time.strftime("00%Y"), "date_start": time.strftime("%Y-01-01"), "date_end": time.strftime("%Y-12-31"), }) wizard.get_partners() wizard.create_xml() ns = {"ns2": "http://www.minfin.fgov.be/IntraConsignment"} xml = XML(base64.b64decode(wizard.file_save)) xml_vat_amount = xml.xpath("//ns2:IntraListing", namespaces=ns)[0].attrib["AmountSum"] codes = xml.xpath("//ns2:IntraListing/ns2:IntraClient/ns2:Code", namespaces=ns) for code in codes: self.assertTrue(code.text) self.assertEqual("450.00", xml_vat_amount)
def parse(self, xmlContent): from lxml.etree import XML out = "" dom = XML(xmlContent) dom_head = dom.xpath("//xmoto")[0] for child in dom_head: groupName = child.tag out += "%s = " % groupName.upper() out += self.getGroupContent(child) return out
def get_federation_clarin_sps(id_fed_name: str,federation_md_file_path: str): logging.info("Selecting SP entity IDs from {}...".format(id_fed_name)) with open(federation_md_file_path, "rb") as clarin_sps_at_federation_file: clarin_sps_at_federation_md = XML(clarin_sps_at_federation_file.read()) entities = set() xpath = "//md:EntityDescriptor[md:Extensions/mdattr:EntityAttributes/saml:Attribute[\ @Name='http://macedir.org/entity-category']/saml:AttributeValue[text() =\ 'http://clarin.eu/category/clarin-member']]" # SURFconext's feed is already only showing CLARIN SPs if id_fed_name == "SURFconext": xpath = "//md:EntityDescriptor" for e in clarin_sps_at_federation_md.xpath(xpath, namespaces=NAMESPACE_PREFIX_MAP): entities.add(e.get('entityID')) return entities
def test_single_vj(self): monday_of_desired_week = datetime.date(2018, 3, 12) with open(dirname(__file__) + "/single_vj.xml", mode='rb') as f: root = XML(f.read()) [vjelem] = root.xpath("//tx:VehicleJourney", namespaces=NAMESPACES) rtn = parse_single_vj_elem(vjelem, monday_of_desired_week) [ privatecode, jpref_id, vjcode, other_vjcode, linecode, days_bitmask, departuretime ] = rtn self.assertEqual(privatecode, "cen-33-6-W-y11-13-287-UU") self.assertEqual(jpref_id, "JP_33-6-W-y11-13-35-I-5") self.assertEqual(vjcode, "VJ_33-6-W-y11-13-287-UU") self.assertEqual(other_vjcode, None) self.assertEqual(linecode, "33-6-W-y11-13") self.assertEqual(days_bitmask, 1 << 6) self.assertEqual(departuretime, "08:48:00")
def fields_view_get(self, view_id=None, view_type='form', toolbar=False, submenu=False): res = super(Partner, self).fields_view_get(view_id, view_type, toolbar, submenu) if view_type == 'form': doc = XML(res['arch']) if not self.user_has_groups('sales_team.group_sale_manager'): node = doc.xpath("//field[@name='area_id']") if node: node = node[0] node.set("options", "{'no_create': True, 'no_create_edit': True}") setup_modifiers(node, res['fields']['area_id']) res['arch'] = tostring(doc, encoding='unicode') return res
def inasafe_metadata_fix(layer_id): """Attempt to fix problem of InaSAFE metadata. This fix is needed to make sure InaSAFE metadata is persisted in GeoNode and is used correctly by GeoSAFE. This bug happens because InaSAFE metadata implement wrong schema type in supplementalInformation. :param layer_id: layer ID :type layer_id: int :return: """ # Take InaSAFE keywords from xml metadata *file* try: instance = Layer.objects.get(id=layer_id) xml_file = instance.upload_session.layerfile_set.get(name='xml') # if xml file exists, check supplementalInformation field namespaces = { 'gmd': 'http://www.isotc211.org/2005/gmd', 'gco': 'http://www.isotc211.org/2005/gco' } content = xml_file.file.read() root = XML(content) # supplemental_info = root.xpath( # '//gmd:supplementalInformation', # namespaces=namespaces)[0] # Check that it contains InaSAFE metadata inasafe_el = root.xpath(ISO_METADATA_INASAFE_KEYWORD_TAG) inasafe_provenance_el = root.xpath( ISO_METADATA_INASAFE_PROVENANCE_KEYWORD_TAG) # Take InaSAFE metadata if not inasafe_el: # Do nothing if InaSAFE tag didn't exists return # Take root xml from layer metadata_xml field layer_root_xml = XML(instance.metadata_xml) layer_sup_info = layer_root_xml.xpath( '//gmd:supplementalInformation', namespaces=namespaces)[0] char_string_tagname = '{gco}CharacterString'.format(**namespaces) layer_sup_info_content = layer_sup_info.find(char_string_tagname) if layer_sup_info_content is None: # Insert gco:CharacterString value el = Element(char_string_tagname) layer_sup_info.insert(0, el) # put InaSAFE keywords after CharacterString layer_inasafe_meta_content = layer_sup_info.find('inasafe') if layer_inasafe_meta_content is not None: # Clear existing InaSAFE keywords, replace with new one layer_sup_info.remove(layer_inasafe_meta_content) layer_sup_info.insert(1, inasafe_el) # provenance only shows up on impact layers layer_inasafe_meta_provenance = layer_sup_info.find( 'inasafe_provenance') if inasafe_provenance_el is not None: if layer_inasafe_meta_provenance is not None: # Clear existing InaSAFE keywords, replace with new one layer_sup_info.remove(layer_inasafe_meta_provenance) layer_sup_info.insert(1, inasafe_provenance_el) # write back to resource base so the same thing returned by csw resources = ResourceBase.objects.filter( id=instance.resourcebase_ptr.id) resources.update( metadata_xml=etree.tostring(layer_root_xml, pretty_print=True)) # update qgis server xml file with open(xml_file.file.path, mode='w') as f: f.write(etree.tostring(layer_root_xml, pretty_print=True)) qgis_layer = instance.qgis_layer qgis_xml_file = '{prefix}.xml'.format( prefix=qgis_layer.qgis_layer_path_prefix) with open(qgis_xml_file, mode='w') as f: f.write(etree.tostring(layer_root_xml, pretty_print=True)) # update InaSAFE keywords cache metadata, created = Metadata.objects.get_or_create(layer=instance) inasafe_metadata_xml = etree.tostring(inasafe_el, pretty_print=True) if inasafe_provenance_el: inasafe_metadata_xml += '\n' inasafe_metadata_xml += etree.tostring( inasafe_provenance_el, pretty_print=True) metadata.keywords_xml = inasafe_metadata_xml metadata.save() except Exception as e: LOGGER.debug(e) pass
class Gene(object): def __init__(self,xml): assert '<Entrezgene-Set>' in xml and '</Entrezgene-Set>' in xml, "invalid Gene XML" self._root = XML(xml) # TODO: rename hgnc or somesuch def locus(self): return self._root.xpath('/Entrezgene-Set/Entrezgene/Entrezgene_gene/Gene-ref/Gene-ref_locus/text()')[0] def desc(self): try: return self._root.xpath('/Entrezgene-Set/Entrezgene/Entrezgene_gene/Gene-ref/Gene-ref_desc/text()')[0] except: return None def maploc(self): try: return self._root.xpath('/Entrezgene-Set/Entrezgene/Entrezgene_gene/Gene-ref/Gene-ref_maploc/text()')[0] except: return None def summary(self): return self._root.xpath('/Entrezgene-Set/Entrezgene/Entrezgene_summary/text()')[0] def grch37p5_mapping(self): gc = self._grch37p5_gc() si = gc.find('Gene-commentary_seqs/Seq-loc/Seq-loc_int/Seq-interval') ac = gc.find('Gene-commentary_accession').text v = gc.find('Gene-commentary_version').text return { 'chr': _NC_to_chr(ac), 'accession': ac, 'version': v, 'ac': ac+'.'+v, 'start': int(si.find('Seq-interval_from').text), 'end': int(si.find('Seq-interval_to').text), 'strand': si.find('Seq-interval_strand/Na-strand').get('value'), 'gi': int(si.find('Seq-interval_id/Seq-id/Seq-id_gi').text), } def grch37p5_product_exons(self,acv): gc = self._grch37p5_product_gc(acv) i = gc.iterfind('Gene-commentary_genomic-coords/Seq-loc/Seq-loc_mix/Seq-loc-mix/Seq-loc/Seq-loc_int/Seq-interval') return [ (int(n.find('Seq-interval_from').text),int(n.find('Seq-interval_to').text)) for n in i ] def grch37p5_product_strand(self,acv): gc = self._grch37p5_product_gc(acv) n = gc.find('Gene-commentary_genomic-coords/Seq-loc/Seq-loc_mix/Seq-loc-mix/Seq-loc/Seq-loc_int/Seq-interval') return n.find('Seq-interval_strand/Na-strand').get('value') def grch37p5_product_seq_id(self,acv): gc = self._grch37p5_product_gc(acv) n = gc.find('Gene-commentary_genomic-coords/Seq-loc/Seq-loc_mix/Seq-loc-mix/Seq-loc/Seq-loc_int/Seq-interval') return n.find('Seq-interval_id/Seq-id/Seq-id_gi').text ###################################################################### ## Internal functions # TODO: Expand to manipulate alignments to non-chromosomal reference # e.g., NM_000034.3, gene 226, aligns to an NG and HuRef, but not # to GRCh37. Should use gis for all alignment def _grch37p5_product_gc(self,acv): ac,v = acv.split('.') gc = self._grch37p5_gc() pred = ' and '.join(['Gene-commentary_accession/text()="{ac}"', 'Gene-commentary_version/text()="{v}"']) pred = pred.format(ac=ac, v=v) xpath = 'Gene-commentary_products/Gene-commentary[%s]' % (pred) nodes = gc.xpath(xpath) if len(nodes) != 1: raise RuntimeError("Got %d Gene-commentary_products for %s"%(len(nodes),acv)) return nodes[0] def _grch37p5_gc(self): return self._gc(heading='Reference GRCh37.p5 Primary Assembly') def _gc(self,heading): xpath = '/Entrezgene-Set/Entrezgene/Entrezgene_locus/Gene-commentary[Gene-commentary_heading[text()="%s"]]' % (heading) return self._root.xpath(xpath)[0]