Example #1
0
    def parse(self):
        parser = sax.make_parser()
        parser.setContentHandler(self.handler)
        parser.setFeature(sax.handler.feature_namespaces, 1)
        parser.parse(self.xcri_file)
        #buffered_data = self.xcri_file.read(self.buffer_size)
        #while buffered_data:
        #    parser.feed(buffered_data)
        #    buffered_data = self.xcri_file.read(self.buffer_size)
        #parser.close()

        # transformations
        for p in self.handler.presentations:
            try:
                p['provider_title'] = p['provider_title'][0]
                p['course_title'] = p['course_title'][0]
                p['course_identifier'] = self._get_identifier(p['course_identifier'])
                p['course_description'] = ''.join(p['course_description'])
                presentation_id = self._get_identifier(p['presentation_identifier'])
                if not presentation_id:
                    # Presentation identifier is the main ID for a document
                    # if there is no ID, we do not want to import it
                    raise Exception("Presentation with no ID")
                p['presentation_identifier'] = presentation_id
                if 'presentation_start' in p:
                    p['presentation_start'] = self._date_to_solr_format(p['presentation_start'][0])
                if 'presentation_end' in p:
                    p['presentation_end'] = self._date_to_solr_format(p['presentation_end'][0])
                if 'presentation_applyFrom' in p:
                    p['presentation_applyFrom'] = self._date_to_solr_format(p['presentation_applyFrom'][0])
                if 'presentation_applyUntil' in p:
                    p['presentation_applyUntil'] = self._date_to_solr_format(p['presentation_applyUntil'][0])
                if 'presentation_bookingEndpoint' in p:
                    p['presentation_bookingEndpoint'] = p['presentation_bookingEndpoint'][0]
                if 'presentation_memberApplyTo' in p:
                    p['presentation_memberApplyTo'] = p['presentation_memberApplyTo'][0]
                if 'presentation_attendanceMode' in p:
                    p['presentation_attendanceMode'] = p['presentation_attendanceMode'][0]
                if 'presentation_attendancePattern' in p:
                    p['presentation_attendancePattern'] = p['presentation_attendancePattern'][0]
                if 'presentation_venue_identifier' in p:
                    # we're only interested by OxPoints ID atm
                    oxpoints = self._get_identifier(p['presentation_venue_identifier'],
                        uri_base="http://oxpoints.oucs.ox.ac.uk/id/")
                    if oxpoints:
                        p['presentation_venue_identifier'] = 'oxpoints:{id}'.format(id=oxpoints)
                    else:
                        del p['presentation_venue_identifier']

                p['course_subject'] = [subject for subject in p['course_subject'] if subject not in self.ignore_subjects]

                self.presentations.append(p)
            except Exception as e:
                logger.warning("Couldn't transform presentation", exc_info=True,
                    extra={'presentation': p})
def parse_file(input_file, output_file):
    # Create an XMLReader
    parser = xml.sax.make_parser()
    # Turn off namepsaces
    parser.setFeature(xml.sax.handler.feature_namespaces, 0)

    # Override the default ContextHandler
    Handler = TemplatesAndModulesHandler(input_file, output_file)
    parser.setContentHandler(Handler)

    parser.parse(input_file)
Example #3
0
def make_parser(handler):
	"""
	Convenience function to construct a document parser with namespaces
	enabled and validation disabled.  Document validation is a nice
	feature, but enabling validation can require the LIGO LW DTD to be
	downloaded from the LDAS document server if the DTD is not included
	inline in the XML.  This requires a working connection to the
	internet and the server to be up.
	"""
	parser = sax.make_parser()
	parser.setContentHandler(handler)
	parser.setFeature(sax.handler.feature_namespaces, True)
	parser.setFeature(sax.handler.feature_validation, False)
	parser.setFeature(sax.handler.feature_external_ges, False)
	return parser
Example #4
0
def make_parser(handler):
	"""
	Convenience function to construct a document parser with namespaces
	enabled and validation disabled.  Document validation is a nice
	feature, but enabling validation can require the LIGO LW DTD to be
	downloaded from the LDAS document server if the DTD is not included
	inline in the XML.  This requires a working connection to the
	internet and the server to be up.
	"""
	parser = sax.make_parser()
	parser.setContentHandler(handler)
	parser.setFeature(sax.handler.feature_namespaces, True)
	parser.setFeature(sax.handler.feature_validation, False)
	parser.setFeature(sax.handler.feature_external_ges, False)
	return parser
Example #5
0
def upload_handler():
    addr = get_ip(request)
    acheck = get_auth(addr, request, ACCESS_UPLOAD)
    user = acheck['nick']
    # check if the post request has the file part
    if 'file' not in request.files:
        app.logger.warning('%s (%s): No file part', addr, user)
        abort(500)
    file = request.files['file']
    # if user does not select file, browser also
    # submit a empty part without filename
    if file.filename == '':
        app.logger.warning('%s (%s): No selected file', addr, user)
        abort(500)
    if file and allowed_file(file.filename):
        H = RegHandler()
        hd = hashlib.sha256()
        hda = hashlib.sha256()
        mtime = ""
        size = 0
        uniqname = ""
        tempdir = os.path.join(app.config['UPLOAD_FOLDER'], str(uuid.uuid4()))
        os.mkdir(tempdir)
        # New filename harcoded - dump.zip
        filename = os.path.join(tempdir, 'dump.zip')
        file.save(filename)
        asize = os.path.getsize(filename)
        # Check zip archive
        if zipfile.is_zipfile(filename):
            with zipfile.ZipFile(filename) as myzip:
                try:
                    # check for dump.xml and dump.xml.sig presents
                    info1 = myzip.getinfo('dump.xml')
                    mtime = "%4d-%02d-%02dT%02d:%02d:%02d+03:00" % info1.date_time  # fixed time zone
                    size = info1.file_size
                    info2 = myzip.getinfo('dump.xml.sig')
                    # extract dump.xml and dump.xml.sig
                    myxml = myzip.extract('dump.xml', tempdir)
                    mysig = myzip.extract('dump.xml.sig', tempdir)
                    # check signature
                    tempfile = os.path.join(tempdir, 'dump.xml.temp')
                    args = [
                        'openssl', 'smime', '-verify', '-engine', 'gost',
                        '-in', mysig, '-noverify', '-inform', 'DER',
                        '-content', myxml, '-out', tempfile
                    ]
                    if subprocess.call(args) == 0:
                        parser = xml.sax.make_parser()
                        parser.setFeature(xml.sax.handler.feature_namespaces,
                                          0)
                        parser.setContentHandler(H)
                        parser.parse(myxml)
                    else:
                        app.logger.error("%s (%s): OpenSSL execute error %s",
                                         addr, user,
                                         sys.exc_info()[1])
                        abort(500)
                    # get hash sha256
                    with open(myxml, 'rb') as fh:
                        s = b''
                        p = b''
                        fl = 0
                        for block in iter(lambda: fh.read(BLOCK_SIZE), b''):
                            hda.update(block)
                            if fl == 0:
                                s += block
                                if _rb.match(s):
                                    hd.update(_rb.sub(b"", s))
                                    fl = 1
                            elif fl == 1:
                                s = p + block
                                if _re.search(s):
                                    hd.update(_re.sub(b"", s))
                                    fl = 2
                                else:
                                    hd.update(p)
                                    p = block
                    # clear for temporary files
                    if os.path.exists(tempfile):
                        os.unlink(tempfile)
                    if os.path.exists(myxml):
                        os.unlink(myxml)
                    if os.path.exists(mysig):
                        os.unlink(mysig)
                except:
                    app.logger.error("%s (%s): Check error %s", addr, user,
                                     sys.exc_info()[1])
                    abort(500)
            uniqid = hda.hexdigest()
            realid = hd.hexdigest()
            uniqname = uniqid + '.zip'
            datadir = os.path.join(app.config['DATA_FOLDER'], uniqname[0:2],
                                   uniqname[2:4])
            if not os.path.exists(datadir):
                os.makedirs(datadir)
            newfilename = os.path.join(datadir, uniqname)
            # check batabase
            cur = mysql.connection.cursor()
            cur.execute('SELECT * FROM dumps WHERE id = %s', (uniqid, ))
            rv = cur.fetchall()
            dump = rv[0] if rv else None
            _mtime = dateutil.parser.parse(mtime).timestamp()
            if dump is not None:
                app.logger.warning('%s (%s): Record %s already exists', addr,
                                   user, uniqname)
                if dump['a'] == 0:
                    add_file(addr, user, uniqname, filename, newfilename)
                app.logger.warning('%s (%s): Archive %s, skipping...', addr,
                                   user, uniqname)
                if _mtime != dump['m']:
                    app.logger.warning(
                        '%s (%s): Record %s has mtime: %s, but file has mtime: %s',
                        addr, user, uniqname, dump['m'], _mtime)
            else:
                add_file(addr, user, uniqname, filename, newfilename)
                cur.execute(
                    'INSERT INTO `dumps` (`id`, `crc`, `ut`, `utu`, `m`, `as`, `s`, `u`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)',
                    (uniqid, realid, int(
                        H.updateTime_ut), int(H.updateTimeUrgently_ut),
                     int(_mtime), size, asize, int(time.time())))
                mysql.connection.commit()
                app.logger.info("%s (%s): %s (%s) record was added", addr,
                                user, uniqid, realid)
            if os.path.exists(filename):
                os.unlink(filename)
            if os.path.exists(tempdir):
                os.rmdir(tempdir)
        else:
            app.logger.error("%s (%s): Not zip file", addr, user)
            abort(500)
        return '''
                <!doctype html>
                <title>OK</title>
                <h1>OK %s updateTime=%s updateTimeUrgently=%s mtime=%s</h1>
                ''' % (uniqname, int(
            H.updateTime_ut), int(H.updateTimeUrgently_ut), int(_mtime))
    else:
        app.logger.error("%s (%s): Bogus filename %s", addr, user,
                         file.filename)
        abort(500)
    return '''
Example #6
0
def upload_handler():
    addr = get_ip(request)
    acheck = get_auth(addr, request, ACCESS_UPLOAD)
    user = acheck['nick']
    # check if the post request has the file part
    if 'file' not in request.files:
        app.logger.warning('%s (%s): No file part', addr, user)
        abort(500)
    file = request.files['file']
    # if user does not select file, browser also
    # submit a empty part without filename
    if file.filename == '':
        app.logger.warning('%s (%s): No selected file', addr, user)
        abort(500)
    if file and allowed_file(file.filename):
        H = RegHandler()
        hd = hashlib.sha256()
        hda = hashlib.sha256()
        size = 0
        uniqname = ""
        tempdir = os.path.join(app.config['UPLOAD_FOLDER'], str(uuid.uuid4()))
        os.mkdir(tempdir)
        # New filename harcoded - dump.xml
        filename = os.path.join(tempdir, 'dump.xml')
        file.save(filename)
        # Check zip archive
        try:
            # SOME CHECK???!!!
            args = [
                'xmllint', '--noout', '--schema', '/srv/dumpby/dump.xsd',
                filename
            ]
            rcode = subprocess.call(args)
            if rcode == 0:
                app.logger.info("%s (%s): check passed", addr, user)
            else:
                app.logger.error("%s (%s): xmllint execute error %d", addr,
                                 user, rcode)
                raise
            # parse xml (get updatetime)
            parser = xml.sax.make_parser()
            parser.setFeature(xml.sax.handler.feature_namespaces, 0)
            parser.setContentHandler(H)
            parser.parse(filename)
            # get hash sha256
            with open(filename, 'rb') as fh:
                s = b''
                p = b''
                fl = 0
                for block in iter(lambda: fh.read(BLOCK_SIZE), b''):
                    hda.update(block)
                    if fl == 0:
                        s += block
                        if _rb.search(s):
                            hd.update(_rb.sub(b"", s))
                            fl = 1
                    elif fl == 1:
                        s = p + block
                        if _re.search(s):
                            hd.update(_re.sub(b"", s))
                            fl = 2
                        else:
                            hd.update(p)
                            p = block
        except:
            app.logger.error("%s (%s): Check error %s", addr, user,
                             sys.exc_info()[1])
            if os.path.exists(filename):
                os.unlink(filename)
            if os.path.exists(tempdir):
                os.rmdir(tempdir)
            abort(500)
        uniqid = hda.hexdigest()
        realid = hd.hexdigest()
        uniqname = uniqid + '.xml'
        datadir = os.path.join(app.config['DATA_FOLDER'], uniqname[0:2],
                               uniqname[2:4])
        if not os.path.exists(datadir):
            os.makedirs(datadir)
        newfilename = os.path.join(datadir, uniqname)
        # check batabase
        cur = mysql.connection.cursor()
        cur.execute('SELECT * FROM dumps WHERE id = %s', (uniqid, ))
        rv = cur.fetchall()
        dump = rv[0] if rv else None
        if dump is not None:
            app.logger.warning('%s (%s): Record %s already exists', addr, user,
                               uniqname)
            if dump['a'] == 0:
                add_file(addr, user, uniqname, filename, newfilename)
            app.logger.warning('%s (%s): Archive %s, skipping...', addr, user,
                               uniqname)
        else:
            add_file(addr, user, uniqname, filename, newfilename)
            cur.execute(
                'INSERT INTO `dumps` (`id`, `crc`, `ut`, `s`, `u`) VALUES (%s, %s, %s, %s, %s)',
                (uniqid, realid, int(H.updateTime_ut), size, int(time.time())))
            mysql.connection.commit()
            app.logger.info("%s (%s): %s (%s) record was added", addr, user,
                            uniqid, realid)
        if os.path.exists(filename):
            os.unlink(filename)
        if os.path.exists(tempdir):
            os.rmdir(tempdir)
        return '''
                <!doctype html>
                <title>OK</title>
                <h1>OK %s updateTime=%s</h1>
                ''' % (uniqname, int(H.updateTime_ut))
    else:
        app.logger.error("%s (%s): Bogus filename %s", addr, user,
                         file.filename)
        abort(500)
    return '''
    # Call when an elements ends
    def endElement(self, tag):
        if tag == "node":
            if self.node and self.node.saveToDB == 1:
                #print "End of node. Node contains tourism tag with the right values, save to DB!"
                # save self.node class to DB
                try:
                    # Do a bunch of insert statements into the db schema
                    cursor = myDB.executeQuery(
                        "INSERT INTO osm_nodes (lat, lon) VALUES (%s, %s)",
                        (self.node.lat, self.node.lon))
                    nodeID = cursor.lastrowid
                    for tag in self.node.tags:
                        myDB.executeQuery(
                            "INSERT INTO osm_tags (node_id, k, v) VALUES (%s, %s, %s)",
                            (nodeID, tag.k, tag.v))
                except Exception as e:
                    print tag.k, tag.v, e
            self.node = ""


if (__name__ == "__main__"):
    # create an XMLReader
    parser = xml.sax.make_parser()
    # turn off namepsaces
    parser.setFeature(xml.sax.handler.feature_namespaces, 0)
    # override the default ContextHandler
    Handler = XMLHandler()
    parser.setContentHandler(Handler)
    parser.parse("openStreetMapData.xml")