def parse(self): parser = sax.make_parser() parser.setContentHandler(self.handler) parser.setFeature(sax.handler.feature_namespaces, 1) parser.parse(self.xcri_file) #buffered_data = self.xcri_file.read(self.buffer_size) #while buffered_data: # parser.feed(buffered_data) # buffered_data = self.xcri_file.read(self.buffer_size) #parser.close() # transformations for p in self.handler.presentations: try: p['provider_title'] = p['provider_title'][0] p['course_title'] = p['course_title'][0] p['course_identifier'] = self._get_identifier(p['course_identifier']) p['course_description'] = ''.join(p['course_description']) presentation_id = self._get_identifier(p['presentation_identifier']) if not presentation_id: # Presentation identifier is the main ID for a document # if there is no ID, we do not want to import it raise Exception("Presentation with no ID") p['presentation_identifier'] = presentation_id if 'presentation_start' in p: p['presentation_start'] = self._date_to_solr_format(p['presentation_start'][0]) if 'presentation_end' in p: p['presentation_end'] = self._date_to_solr_format(p['presentation_end'][0]) if 'presentation_applyFrom' in p: p['presentation_applyFrom'] = self._date_to_solr_format(p['presentation_applyFrom'][0]) if 'presentation_applyUntil' in p: p['presentation_applyUntil'] = self._date_to_solr_format(p['presentation_applyUntil'][0]) if 'presentation_bookingEndpoint' in p: p['presentation_bookingEndpoint'] = p['presentation_bookingEndpoint'][0] if 'presentation_memberApplyTo' in p: p['presentation_memberApplyTo'] = p['presentation_memberApplyTo'][0] if 'presentation_attendanceMode' in p: p['presentation_attendanceMode'] = p['presentation_attendanceMode'][0] if 'presentation_attendancePattern' in p: p['presentation_attendancePattern'] = p['presentation_attendancePattern'][0] if 'presentation_venue_identifier' in p: # we're only interested by OxPoints ID atm oxpoints = self._get_identifier(p['presentation_venue_identifier'], uri_base="http://oxpoints.oucs.ox.ac.uk/id/") if oxpoints: p['presentation_venue_identifier'] = 'oxpoints:{id}'.format(id=oxpoints) else: del p['presentation_venue_identifier'] p['course_subject'] = [subject for subject in p['course_subject'] if subject not in self.ignore_subjects] self.presentations.append(p) except Exception as e: logger.warning("Couldn't transform presentation", exc_info=True, extra={'presentation': p})
def parse_file(input_file, output_file): # Create an XMLReader parser = xml.sax.make_parser() # Turn off namepsaces parser.setFeature(xml.sax.handler.feature_namespaces, 0) # Override the default ContextHandler Handler = TemplatesAndModulesHandler(input_file, output_file) parser.setContentHandler(Handler) parser.parse(input_file)
def make_parser(handler): """ Convenience function to construct a document parser with namespaces enabled and validation disabled. Document validation is a nice feature, but enabling validation can require the LIGO LW DTD to be downloaded from the LDAS document server if the DTD is not included inline in the XML. This requires a working connection to the internet and the server to be up. """ parser = sax.make_parser() parser.setContentHandler(handler) parser.setFeature(sax.handler.feature_namespaces, True) parser.setFeature(sax.handler.feature_validation, False) parser.setFeature(sax.handler.feature_external_ges, False) return parser
def upload_handler(): addr = get_ip(request) acheck = get_auth(addr, request, ACCESS_UPLOAD) user = acheck['nick'] # check if the post request has the file part if 'file' not in request.files: app.logger.warning('%s (%s): No file part', addr, user) abort(500) file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename if file.filename == '': app.logger.warning('%s (%s): No selected file', addr, user) abort(500) if file and allowed_file(file.filename): H = RegHandler() hd = hashlib.sha256() hda = hashlib.sha256() mtime = "" size = 0 uniqname = "" tempdir = os.path.join(app.config['UPLOAD_FOLDER'], str(uuid.uuid4())) os.mkdir(tempdir) # New filename harcoded - dump.zip filename = os.path.join(tempdir, 'dump.zip') file.save(filename) asize = os.path.getsize(filename) # Check zip archive if zipfile.is_zipfile(filename): with zipfile.ZipFile(filename) as myzip: try: # check for dump.xml and dump.xml.sig presents info1 = myzip.getinfo('dump.xml') mtime = "%4d-%02d-%02dT%02d:%02d:%02d+03:00" % info1.date_time # fixed time zone size = info1.file_size info2 = myzip.getinfo('dump.xml.sig') # extract dump.xml and dump.xml.sig myxml = myzip.extract('dump.xml', tempdir) mysig = myzip.extract('dump.xml.sig', tempdir) # check signature tempfile = os.path.join(tempdir, 'dump.xml.temp') args = [ 'openssl', 'smime', '-verify', '-engine', 'gost', '-in', mysig, '-noverify', '-inform', 'DER', '-content', myxml, '-out', tempfile ] if subprocess.call(args) == 0: parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 0) parser.setContentHandler(H) parser.parse(myxml) else: app.logger.error("%s (%s): OpenSSL execute error %s", addr, user, sys.exc_info()[1]) abort(500) # get hash sha256 with open(myxml, 'rb') as fh: s = b'' p = b'' fl = 0 for block in iter(lambda: fh.read(BLOCK_SIZE), b''): hda.update(block) if fl == 0: s += block if _rb.match(s): hd.update(_rb.sub(b"", s)) fl = 1 elif fl == 1: s = p + block if _re.search(s): hd.update(_re.sub(b"", s)) fl = 2 else: hd.update(p) p = block # clear for temporary files if os.path.exists(tempfile): os.unlink(tempfile) if os.path.exists(myxml): os.unlink(myxml) if os.path.exists(mysig): os.unlink(mysig) except: app.logger.error("%s (%s): Check error %s", addr, user, sys.exc_info()[1]) abort(500) uniqid = hda.hexdigest() realid = hd.hexdigest() uniqname = uniqid + '.zip' datadir = os.path.join(app.config['DATA_FOLDER'], uniqname[0:2], uniqname[2:4]) if not os.path.exists(datadir): os.makedirs(datadir) newfilename = os.path.join(datadir, uniqname) # check batabase cur = mysql.connection.cursor() cur.execute('SELECT * FROM dumps WHERE id = %s', (uniqid, )) rv = cur.fetchall() dump = rv[0] if rv else None _mtime = dateutil.parser.parse(mtime).timestamp() if dump is not None: app.logger.warning('%s (%s): Record %s already exists', addr, user, uniqname) if dump['a'] == 0: add_file(addr, user, uniqname, filename, newfilename) app.logger.warning('%s (%s): Archive %s, skipping...', addr, user, uniqname) if _mtime != dump['m']: app.logger.warning( '%s (%s): Record %s has mtime: %s, but file has mtime: %s', addr, user, uniqname, dump['m'], _mtime) else: add_file(addr, user, uniqname, filename, newfilename) cur.execute( 'INSERT INTO `dumps` (`id`, `crc`, `ut`, `utu`, `m`, `as`, `s`, `u`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)', (uniqid, realid, int( H.updateTime_ut), int(H.updateTimeUrgently_ut), int(_mtime), size, asize, int(time.time()))) mysql.connection.commit() app.logger.info("%s (%s): %s (%s) record was added", addr, user, uniqid, realid) if os.path.exists(filename): os.unlink(filename) if os.path.exists(tempdir): os.rmdir(tempdir) else: app.logger.error("%s (%s): Not zip file", addr, user) abort(500) return ''' <!doctype html> <title>OK</title> <h1>OK %s updateTime=%s updateTimeUrgently=%s mtime=%s</h1> ''' % (uniqname, int( H.updateTime_ut), int(H.updateTimeUrgently_ut), int(_mtime)) else: app.logger.error("%s (%s): Bogus filename %s", addr, user, file.filename) abort(500) return '''
def upload_handler(): addr = get_ip(request) acheck = get_auth(addr, request, ACCESS_UPLOAD) user = acheck['nick'] # check if the post request has the file part if 'file' not in request.files: app.logger.warning('%s (%s): No file part', addr, user) abort(500) file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename if file.filename == '': app.logger.warning('%s (%s): No selected file', addr, user) abort(500) if file and allowed_file(file.filename): H = RegHandler() hd = hashlib.sha256() hda = hashlib.sha256() size = 0 uniqname = "" tempdir = os.path.join(app.config['UPLOAD_FOLDER'], str(uuid.uuid4())) os.mkdir(tempdir) # New filename harcoded - dump.xml filename = os.path.join(tempdir, 'dump.xml') file.save(filename) # Check zip archive try: # SOME CHECK???!!! args = [ 'xmllint', '--noout', '--schema', '/srv/dumpby/dump.xsd', filename ] rcode = subprocess.call(args) if rcode == 0: app.logger.info("%s (%s): check passed", addr, user) else: app.logger.error("%s (%s): xmllint execute error %d", addr, user, rcode) raise # parse xml (get updatetime) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 0) parser.setContentHandler(H) parser.parse(filename) # get hash sha256 with open(filename, 'rb') as fh: s = b'' p = b'' fl = 0 for block in iter(lambda: fh.read(BLOCK_SIZE), b''): hda.update(block) if fl == 0: s += block if _rb.search(s): hd.update(_rb.sub(b"", s)) fl = 1 elif fl == 1: s = p + block if _re.search(s): hd.update(_re.sub(b"", s)) fl = 2 else: hd.update(p) p = block except: app.logger.error("%s (%s): Check error %s", addr, user, sys.exc_info()[1]) if os.path.exists(filename): os.unlink(filename) if os.path.exists(tempdir): os.rmdir(tempdir) abort(500) uniqid = hda.hexdigest() realid = hd.hexdigest() uniqname = uniqid + '.xml' datadir = os.path.join(app.config['DATA_FOLDER'], uniqname[0:2], uniqname[2:4]) if not os.path.exists(datadir): os.makedirs(datadir) newfilename = os.path.join(datadir, uniqname) # check batabase cur = mysql.connection.cursor() cur.execute('SELECT * FROM dumps WHERE id = %s', (uniqid, )) rv = cur.fetchall() dump = rv[0] if rv else None if dump is not None: app.logger.warning('%s (%s): Record %s already exists', addr, user, uniqname) if dump['a'] == 0: add_file(addr, user, uniqname, filename, newfilename) app.logger.warning('%s (%s): Archive %s, skipping...', addr, user, uniqname) else: add_file(addr, user, uniqname, filename, newfilename) cur.execute( 'INSERT INTO `dumps` (`id`, `crc`, `ut`, `s`, `u`) VALUES (%s, %s, %s, %s, %s)', (uniqid, realid, int(H.updateTime_ut), size, int(time.time()))) mysql.connection.commit() app.logger.info("%s (%s): %s (%s) record was added", addr, user, uniqid, realid) if os.path.exists(filename): os.unlink(filename) if os.path.exists(tempdir): os.rmdir(tempdir) return ''' <!doctype html> <title>OK</title> <h1>OK %s updateTime=%s</h1> ''' % (uniqname, int(H.updateTime_ut)) else: app.logger.error("%s (%s): Bogus filename %s", addr, user, file.filename) abort(500) return '''
# Call when an elements ends def endElement(self, tag): if tag == "node": if self.node and self.node.saveToDB == 1: #print "End of node. Node contains tourism tag with the right values, save to DB!" # save self.node class to DB try: # Do a bunch of insert statements into the db schema cursor = myDB.executeQuery( "INSERT INTO osm_nodes (lat, lon) VALUES (%s, %s)", (self.node.lat, self.node.lon)) nodeID = cursor.lastrowid for tag in self.node.tags: myDB.executeQuery( "INSERT INTO osm_tags (node_id, k, v) VALUES (%s, %s, %s)", (nodeID, tag.k, tag.v)) except Exception as e: print tag.k, tag.v, e self.node = "" if (__name__ == "__main__"): # create an XMLReader parser = xml.sax.make_parser() # turn off namepsaces parser.setFeature(xml.sax.handler.feature_namespaces, 0) # override the default ContextHandler Handler = XMLHandler() parser.setContentHandler(Handler) parser.parse("openStreetMapData.xml")