def consumer_queue(proc_id, queue): loader = Loader(conf) while True: try: consumer_data = queue.get(proc_id, 1) if consumer_data == 'STOP': queue.put('STOP') break for data in consumer_data: v = Validation() v.create(data) loader.load(v) except Empty: pass loader.close()
def main(): op = OptionParser() op.add_option('--connection', '-c') op.add_option('--start', '-s') op.add_option('--end', '-e') op.add_option('--interval', '-i') options, arguments = op.parse_args() if not options.connection: op.error('No RDS Connection provided') try: LIMIT = int(options.interval) s = int(options.start) e = int(options.end) except Exception as ex: op.error('Invalid pagination integer: {0}'.format(ex)) with open('big_rds.conf', 'r') as f: conf = js.loads(f.read()) # our connection engine = sqla.create_engine(conf.get('connection')) Session = sessionmaker() Session.configure(bind=engine) session = Session() cmd = "StdInParse -v=always -n -s -f < %s" for OFFSET in xrange(s, e, LIMIT): appends = [] for response in session.query(Response).filter( and_(Response.schemas is not None, Response.schemas != '{}') ).limit(LIMIT).offset(OFFSET).all(): if response.validations: continue response_id = response.id cleaned_content = response.cleaned_content handle, name = tempfile.mkstemp(suffix='.xml') write(handle, cleaned_content) close(handle) # stderr = validate_in_memory(cleaned_content) tc = TimedCmd(cmd % name) try: status, output, error = tc.run(120) except: print 'failed validation: ', response_id continue finally: unlink(name) validated_data = { "response_id": response_id, "valid": 'Error at' not in error, "validated_on": datetime.now().isoformat() } if error: validated_data.update({ "errors": [v.strip() for v in error.split('\n\n') if v] }) v = Validation() v.create(validated_data) appends.append(v) try: session.add_all(appends) session.commit() except Exception as ex: print ex print [a['response_id'] for a in appends] print session.rollback() continue
def main(): op = OptionParser() op.add_option("--connection", "-c") op.add_option("--files", "-f") # comma-delim list, one response id/line options, arguments = op.parse_args() if not options.connection: op.error("No RDS Connection provided") if not options.files: op.error("No file list") with open("big_rds.conf", "r") as f: conf = js.loads(f.read()) # our connection engine = sqla.create_engine(conf.get("connection")) Session = sessionmaker() Session.configure(bind=engine) session = Session() cmd = "StdInParse -v=always -n -s -f < %s" for f in options.files.split(","): with open(f, "r") as g: data = [int(a.strip()) for a in g.readlines() if a] for d in data: response = session.query(Response).filter(Response.id == d).first() if response.validations: continue response_id = response.id cleaned_content = response.cleaned_content handle, name = tempfile.mkstemp(suffix=".xml") write(handle, cleaned_content) close(handle) # stderr = validate_in_memory(cleaned_content) tc = TimedCmd(cmd % name) try: status, output, error = tc.run(60) except: print "failed validation: ", response_id error = "Error at validation CLI: timeout error" # continue finally: unlink(name) validated_data = { "response_id": response_id, "valid": "Error at" not in error, "validated_on": datetime.now().isoformat(), } if error: validated_data.update({"errors": [v.strip() for v in error.split("\n\n") if v]}) v = Validation() v.create(validated_data) try: session.add(v) session.commit() except Exception as ex: print ex print d print session.rollback() continue session.close()
conf = json.loads(f.read()) reader = ResponseReader(conf) loader = Loader(conf) # get the set, validate, store outputs # but need to paginate because of ram issues for i in xrange(0, 668110, 25): print 'QUERYING {0}:{1}'.format(i, 25) for response in reader.read('', limit=25, offset=i): print response.source_url xml = response.cleaned_content stderr = validate_in_memory(xml) data = { "response_id": response.id, "valid": 'Error at' not in stderr, "validated_on": datetime.now() } if stderr: data.update({"errors": [s.strip() for s in stderr.split('\n\n')]}) print '\t{0}'.format(stderr[:100]) try: v = Validation() v.create(data) loader.load(v) except Exception as ex: print ex
loader = Loader(conf) # get the set, validate, store outputs # but need to paginate because of ram issues for i in xrange(0, 668110, 25): print 'QUERYING {0}:{1}'.format(i, 25) for response in reader.read('', limit=25, offset=i): print response.source_url xml = response.cleaned_content stderr = validate_in_memory(xml) data = { "response_id": response.id, "valid": 'Error at' not in stderr, "validated_on": datetime.now() } if stderr: data.update({ "errors": [s.strip() for s in stderr.split('\n\n')] }) print '\t{0}'.format(stderr[:100]) try: v = Validation() v.create(data) loader.load(v) except Exception as ex: print ex