def metadata(self,lid): b64s = base64.encodestring('{0}:{1}'.format(*self.userpass)).replace('\n', '') svr = None url = self.mta.format(lid=lid) self.checklog.debug('WFS0u-'+kmask(url)) retry = True while retry: retry = False content = None try: #cookiesetup(url) req = U2.Request(url) req.add_header('Authorization', 'Basic {0}'.format(b64s)) with closing(U2.urlopen(url=req, timeout=URL_TIMEOUT)) as content: ct = str(content.read()) if self.content(lid, ct):#the checkcontent here pagesscrapes for valid XML without known errors so the parser wont break continue xdoc = LXMLetree().fromstring(ct) #print xdoc.getroot().find('gmd:MD_Metadata',namespaces=NSX) if self.schema.validate(xdoc.gettree()) and self.metamandatory(xdoc): #if self.validate(content,schema): self.checklog.debug('WFS0v-{0}'.format(lid)) else: raise WFSServiceException('XML Fails validation against gmd.xsd with {0}'.format(self.schema.error_log.last_error.message)) except U2.HTTPError as he: if re.search('429',str(he)): self.checklog.debug('WFS0k - Swap keys and Retry on 429. {1}'.format(SLEEP_TIME,he)) global KEY KEY = apikey(KEY_FILE) if KEYINDEX == 0: self.checklog.debug('WFS04 - Wait {0}s and Retry on 429. {1}'.format(SLEEP_TIME,he)) time.sleep(SLEEP_TIME) retry = True continue print 'WFSMeta {0} Failure - {1}'.format(lid,he) self.checklog.error('WFSm0h - {0}/{1}\n{2} on server {3}'.format(lid,he,kmask(url),svr)) except WFSServiceException as we: print 'WFSMeta {0} XSD Failure - {1}'.format(lid,we) self.checklog.error('WFSm0se - {0}/{1}\n{2} on server {3}'.format(lid,we,kmask(url),svr)) except XMLSyntaxError as xe: print 'WFSMeta {0} XMLSyntaxError causing Parse Failure - {1}'.format(lid,xe) self.checklog.error('WFSm0x - {0}/{1}\n{2}\n{3} on server {4}'.format(lid,xe,kmask(url),ct[:100],svr)) except U2.URLError as ue: print 'WFS0ue {0} URLError - {1}'.format(lid,ue) if isinstance(ue.reason, socket.timeout): self.checklog.error('WFS0uet - {0}/{1}\n{2} on server {3}. Retry'.format(lid,ue,kmask(url),svr)) retry = True continue else: self.checklog.error('WFS0ueo - {0}/{1}\n{2} on server {3}'.format(lid,ue,kmask(url),svr)) except Exception as ee: print 'WFS0ee {0} Exception - {1}'.format(lid,ee) self.checklog.error('WFS0ee - {0}/{1}\n{2} on server {3}'.format(lid,ee,kmask(url),svr))
def schema(self): '''Return a schema to validate against, anzlic/gmd''' schema = None try: with open(ANZLIC) as scontent: sdoc = LXMLetree().parse(scontent).gettree() schema = LXMLetree().XMLSchema(sdoc) except XMLSchemaParseError as xse: print( 'WFSMeta XMLSchemaParseError causing Failure - {0}\nThis usually indicates a libxml2<2.7.8 problem' .format(xse)) self.checklog.error('WFSxs - {0}'.format(xse)) return schema
def parse(self, pno): '''Parses useful date fields from LDS layer metadata XML''' retry = True ecounter = 0 n1, n2, n3, n4 = 4 * (0, ) er = None url = 'https://data.linz.govt.nz/layer/{0}/metadata/iso/xml'.format( pno) dd = {} while retry: retry = False content = None try: #tree = LXMLetree().parse(url) with closing(U2.urlopen(url)) as content: tree = LXMLetree().parse(content) n1 = tree.getroot().find(mxp['fd'], namespaces=NSX).text n2 = tree.getroot().find(mxp['pd'], namespaces=NSX).text if n2 and not (re.search( tree.getroot().find(mxp['pdt'], namespaces=NSX), 'publication')): raise DateAvailabilityException( 'Cannot associate date to datetype Pub') n3 = tree.getroot().find(mxp['rd'], namespaces=NSX).text if n3 and not (re.search( tree.getroot().find(mxp['rdt'], namespaces=NSX), 'revision')): raise DateAvailabilityException( 'Cannot associate date to datetype Rev') n4 = tree.getroot().find(mxp['ed'], namespaces=NSX).text except U2.HTTPError as he: if ecounter > MAX_DVL_RETRY: raise else: self.checklog.warn('LPS1 - Error parsing metadata dates ' + pno) ecounter += 1 retry = True except DateAvailabilityException as dae: self.checklog.warn( 'LPS2 - Error identifying date {0}. {1}'.format(pno, dae)) finally: if content: content.close() return n1, n2, n3, n4, er
def parse(self,pno): '''Parses useful date fields from LDS layer metadata XML''' retry = True ecounter = 0 n1,n2,n3,n4 = 4*(0,) er = None url = 'https://data.linz.govt.nz/layer/{0}/metadata/iso/xml'.format(pno) dd = {} while retry: retry = False content = None try: #tree = LXMLetree().parse(url) with closing(U2.urlopen(url)) as content: tree = LXMLetree().parse(content) n1 = tree.getroot().find(mxp['fd'],namespaces=NSX).text n2 = tree.getroot().find(mxp['pd'],namespaces=NSX).text if n2 and not(re.search(tree.getroot().find(mxp['pdt'],namespaces=NSX),'publication')): raise DateAvailabilityException('Cannot associate date to datetype Pub') n3 = tree.getroot().find(mxp['rd'],namespaces=NSX).text if n3 and not(re.search(tree.getroot().find(mxp['rdt'],namespaces=NSX),'revision')): raise DateAvailabilityException('Cannot associate date to datetype Rev') n4 = tree.getroot().find(mxp['ed'],namespaces=NSX).text except U2.HTTPError as he: if ecounter>MAX_DVL_RETRY: raise else: self.checklog.warn('LPS1 - Error parsing metadata dates '+pno) ecounter += 1 retry = True except DateAvailabilityException as dae: self.checklog.warn('LPS2 - Error identifying date {0}. {1}'.format(pno,dae)) finally: if content: content.close() return n1,n2,n3,n4,er
def metadata(self, lid): b64s = base64.encodestring('{0}:{1}'.format(*self.userpass)).replace( '\n', '') svr = None url = self.mta.format(lid=lid) self.checklog.debug('WFS0u-' + kmask(url)) retry = True while retry: retry = False content = None try: #cookiesetup(url) req = U2.Request(url) req.add_header('Authorization', 'Basic {0}'.format(b64s)) with closing(U2.urlopen(url=req, timeout=URL_TIMEOUT)) as content: ct = str(content.read()) if self.content( lid, ct ): #the checkcontent here pagesscrapes for valid XML without known errors so the parser wont break continue xdoc = LXMLetree().fromstring(ct) #print xdoc.getroot().find('gmd:MD_Metadata',namespaces=NSX) if self.schema.validate( xdoc.gettree()) and self.metamandatory(xdoc): #if self.validate(content,schema): self.checklog.debug('WFS0v-{0}'.format(lid)) else: raise WFSServiceException( 'XML Fails validation against gmd.xsd with {0}'. format(self.schema.error_log.last_error.message)) except U2.HTTPError as he: if re.search('429', str(he)): self.checklog.debug( 'WFS0k - Swap keys and Retry on 429. {1}'.format( SLEEP_TIME, he)) global KEY KEY = apikey(KEY_FILE) if KEYINDEX == 0: self.checklog.debug( 'WFS04 - Wait {0}s and Retry on 429. {1}'.format( SLEEP_TIME, he)) time.sleep(SLEEP_TIME) retry = True continue print('WFSMeta {0} Failure - {1}'.format(lid, he)) self.checklog.error( 'WFSm0h - {0}/{1}\n{2} on server {3}'.format( lid, he, kmask(url), svr)) except WFSServiceException as we: print('WFSMeta {0} XSD Failure - {1}'.format(lid, we)) self.checklog.error( 'WFSm0se - {0}/{1}\n{2} on server {3}'.format( lid, we, kmask(url), svr)) except XMLSyntaxError as xe: print('WFSMeta {0} XMLSyntaxError causing Parse Failure - {1}'. format(lid, xe)) self.checklog.error( 'WFSm0x - {0}/{1}\n{2}\n{3} on server {4}'.format( lid, xe, kmask(url), ct[:100], svr)) except U2.URLError as ue: print('WFS0ue {0} URLError - {1}'.format(lid, ue)) if isinstance(ue.reason, socket.timeout): self.checklog.error( 'WFS0uet - {0}/{1}\n{2} on server {3}. Retry'.format( lid, ue, kmask(url), svr)) retry = True continue else: self.checklog.error( 'WFS0ueo - {0}/{1}\n{2} on server {3}'.format( lid, ue, kmask(url), svr)) except Exception as ee: print('WFS0ee {0} Exception - {1}'.format(lid, ee)) self.checklog.error( 'WFS0ee - {0}/{1}\n{2} on server {3}'.format( lid, ee, kmask(url), svr))
def __init__(self, upd, checklog): self.userpass = upd self.checklog = checklog self.tree = LXMLetree().parse( self.rs3.format(id=id[0].text, csw_v=CSW, gmd=NSX['gmd'])) self.schema = self.schema()