def consumer_queue(proc_id, queue):
    loader = Loader(conf)
    while True:
        try:
            consumer_data = queue.get(proc_id, 1)
            if consumer_data == 'STOP':
                queue.put('STOP')
                break

            for data in consumer_data:
                v = Validation()
                v.create(data)
                loader.load(v)
        except Empty:
            pass
    loader.close()
def main():
    op = OptionParser()
    op.add_option('--connection', '-c')
    op.add_option('--start', '-s')
    op.add_option('--end', '-e')
    op.add_option('--interval', '-i')
    options, arguments = op.parse_args()

    if not options.connection:
        op.error('No RDS Connection provided')

    try:
        LIMIT = int(options.interval)
        s = int(options.start)
        e = int(options.end)
    except Exception as ex:
        op.error('Invalid pagination integer: {0}'.format(ex))

    with open('big_rds.conf', 'r') as f:
        conf = js.loads(f.read())

    # our connection
    engine = sqla.create_engine(conf.get('connection'))
    Session = sessionmaker()
    Session.configure(bind=engine)
    session = Session()

    cmd = "StdInParse -v=always -n -s -f < %s"

    for OFFSET in xrange(s, e, LIMIT):
        appends = []
        for response in session.query(Response).filter(
                and_(Response.schemas is not None, Response.schemas != '{}')
            ).limit(LIMIT).offset(OFFSET).all():

            if response.validations:
                continue

            response_id = response.id
            cleaned_content = response.cleaned_content

            handle, name = tempfile.mkstemp(suffix='.xml')
            write(handle, cleaned_content)
            close(handle)

            # stderr = validate_in_memory(cleaned_content)
            tc = TimedCmd(cmd % name)
            try:
                status, output, error = tc.run(120)
            except:
                print 'failed validation: ', response_id
                continue
            finally:
                unlink(name)

            validated_data = {
                "response_id": response_id,
                "valid": 'Error at' not in error,
                "validated_on": datetime.now().isoformat()
            }
            if error:
                validated_data.update({
                    "errors": [v.strip() for v in error.split('\n\n') if v]
                })

            v = Validation()
            v.create(validated_data)
            appends.append(v)

        try:
            session.add_all(appends)
            session.commit()
        except Exception as ex:
            print ex
            print [a['response_id'] for a in appends]
            print
            session.rollback()
            continue
def main():
    op = OptionParser()
    op.add_option("--connection", "-c")
    op.add_option("--files", "-f")  # comma-delim list, one response id/line
    options, arguments = op.parse_args()

    if not options.connection:
        op.error("No RDS Connection provided")
    if not options.files:
        op.error("No file list")

    with open("big_rds.conf", "r") as f:
        conf = js.loads(f.read())

    # our connection
    engine = sqla.create_engine(conf.get("connection"))
    Session = sessionmaker()
    Session.configure(bind=engine)
    session = Session()

    cmd = "StdInParse -v=always -n -s -f < %s"

    for f in options.files.split(","):
        with open(f, "r") as g:
            data = [int(a.strip()) for a in g.readlines() if a]

        for d in data:
            response = session.query(Response).filter(Response.id == d).first()

            if response.validations:
                continue

            response_id = response.id
            cleaned_content = response.cleaned_content

            handle, name = tempfile.mkstemp(suffix=".xml")
            write(handle, cleaned_content)
            close(handle)

            # stderr = validate_in_memory(cleaned_content)
            tc = TimedCmd(cmd % name)
            try:
                status, output, error = tc.run(60)
            except:
                print "failed validation: ", response_id
                error = "Error at validation CLI: timeout error"
                # continue
            finally:
                unlink(name)

            validated_data = {
                "response_id": response_id,
                "valid": "Error at" not in error,
                "validated_on": datetime.now().isoformat(),
            }
            if error:
                validated_data.update({"errors": [v.strip() for v in error.split("\n\n") if v]})

            v = Validation()
            v.create(validated_data)

            try:
                session.add(v)
                session.commit()
            except Exception as ex:
                print ex
                print d
                print
                session.rollback()
                continue

    session.close()
예제 #4
0
    conf = json.loads(f.read())
reader = ResponseReader(conf)

loader = Loader(conf)

# get the set, validate, store outputs
# but need to paginate because of ram issues
for i in xrange(0, 668110, 25):
    print 'QUERYING {0}:{1}'.format(i, 25)
    for response in reader.read('', limit=25, offset=i):
        print response.source_url

        xml = response.cleaned_content
        stderr = validate_in_memory(xml)

        data = {
            "response_id": response.id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now()
        }
        if stderr:
            data.update({"errors": [s.strip() for s in stderr.split('\n\n')]})
            print '\t{0}'.format(stderr[:100])

        try:
            v = Validation()
            v.create(data)
            loader.load(v)
        except Exception as ex:
            print ex
loader = Loader(conf)

# get the set, validate, store outputs
# but need to paginate because of ram issues
for i in xrange(0, 668110, 25):
    print 'QUERYING {0}:{1}'.format(i, 25)
    for response in reader.read('', limit=25, offset=i):
        print response.source_url

        xml = response.cleaned_content
        stderr = validate_in_memory(xml)

        data = {
            "response_id": response.id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now()
        }
        if stderr:
            data.update({
                "errors": [s.strip() for s in stderr.split('\n\n')]
            })
            print '\t{0}'.format(stderr[:100])

        try:
            v = Validation()
            v.create(data)
            loader.load(v)
        except Exception as ex:
            print ex