def consumer_queue(proc_id, queue):
    loader = Loader(conf)
    while True:
        try:
            consumer_data = queue.get(proc_id, 1)
            if consumer_data == 'STOP':
                queue.put('STOP')
                break

            for data in consumer_data:
                v = Validation()
                v.create(data)
                loader.load(v)
        except Empty:
            pass
    loader.close()
    '/home/ubuntu/semantics_pipeline/pipeline_tests/identified/*.json')

with open('local_rds.conf', 'r') as f:
    conf = json.loads(f.read())
loader = Loader(conf)

for i, f in enumerate(files[100:]):
    with open(f, 'r') as g:
        data = json.loads(g.read())

    fname = f.split('/')[-1].split('_')[0]
    fmt = data.get('response_datatype', 'unknown')
    identity = data.get('identity', [])

    try:
        r = Response()
        r.create(data)
        r_id = loader.load(r)
    except Exception as ex:
        print fname, ex
        continue

    if identity and r_id:
        try:
            d = Identity()
            data.update({"response_id": r_id})
            d.create(data)
            loader.load(d)
        except:
            continue
Example #3
0
    conf = json.loads(f.read())
reader = ResponseReader(conf)

loader = Loader(conf)

# get the set, validate, store outputs
# but need to paginate because of ram issues
for i in xrange(0, 668110, 25):
    print 'QUERYING {0}:{1}'.format(i, 25)
    for response in reader.read('', limit=25, offset=i):
        print response.source_url

        xml = response.cleaned_content
        stderr = validate_in_memory(xml)

        data = {
            "response_id": response.id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now()
        }
        if stderr:
            data.update({"errors": [s.strip() for s in stderr.split('\n\n')]})
            print '\t{0}'.format(stderr[:100])

        try:
            v = Validation()
            v.create(data)
            loader.load(v)
        except Exception as ex:
            print ex
from mpp.models import Response
import traceback

"""

"""
# set up the connection
with open("local_rds.conf", "r") as f:
    conf = json.loads(f.read())
loader = Loader(conf)

with open("responses_with_federal_schemas.txt", "r") as f:
    responses = [g.strip() for g in f.readlines() if g]

for i, response in enumerate(responses):
    if i % 5000 == 0:
        print "finished: ", i

    with open(response, "r") as f:
        data = json.loads(f.read())

    r = Response()
    r.create(data)

    try:
        loader.load(r)
    except Exception as ex:
        print response
        print "\t", ex
        traceback.print_exc()
loader = Loader(conf)

# get the set, validate, store outputs
# but need to paginate because of ram issues
for i in xrange(0, 668110, 25):
    print 'QUERYING {0}:{1}'.format(i, 25)
    for response in reader.read('', limit=25, offset=i):
        print response.source_url

        xml = response.cleaned_content
        stderr = validate_in_memory(xml)

        data = {
            "response_id": response.id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now()
        }
        if stderr:
            data.update({
                "errors": [s.strip() for s in stderr.split('\n\n')]
            })
            print '\t{0}'.format(stderr[:100])

        try:
            v = Validation()
            v.create(data)
            loader.load(v)
        except Exception as ex:
            print ex
from mpp.loaders import Loader
from mpp.models import Response
import traceback
'''

'''
# set up the connection
with open('local_rds.conf', 'r') as f:
    conf = json.loads(f.read())
loader = Loader(conf)

with open('responses_with_federal_schemas.txt', 'r') as f:
    responses = [g.strip() for g in f.readlines() if g]

for i, response in enumerate(responses):
    if i % 5000 == 0:
        print 'finished: ', i

    with open(response, 'r') as f:
        data = json.loads(f.read())

    r = Response()
    r.create(data)

    try:
        loader.load(r)
    except Exception as ex:
        print response
        print '\t', ex
        traceback.print_exc()