コード例 #1
0
def consumer_queue(proc_id, queue):
    loader = Loader(conf)
    while True:
        try:
            consumer_data = queue.get(proc_id, 1)
            if consumer_data == 'STOP':
                queue.put('STOP')
                break

            for data in consumer_data:
                v = Validation()
                v.create(data)
                loader.load(v)
        except Empty:
            pass
    loader.close()
コード例 #2
0
import json
import glob
import os
from mpp.loaders import Loader
from mpp.models import Response, Identity
from sqlalchemy.exc import IntegrityError


files = glob.glob(
    '/home/ubuntu/semantics_pipeline/pipeline_tests/identified/*.json')

with open('local_rds.conf', 'r') as f:
    conf = json.loads(f.read())
loader = Loader(conf)

for i, f in enumerate(files[100:]):
    with open(f, 'r') as g:
        data = json.loads(g.read())

    fname = f.split('/')[-1].split('_')[0]
    fmt = data.get('response_datatype', 'unknown')
    identity = data.get('identity', [])

    try:
        r = Response()
        r.create(data)
        r_id = loader.load(r)
    except Exception as ex:
        print fname, ex
        continue
コード例 #3
0
from mpp.readers import ResponseReader
from mpp.utils import validate_in_memory
from mpp.models import Validation
from datetime import datetime
import traceback
'''
geared to validate just the federal responses
from the wee little metadata ec2, reading
and writing to rds
'''
# set up the connection
with open('local_rds.conf', 'r') as f:
    conf = json.loads(f.read())
reader = ResponseReader(conf)

loader = Loader(conf)

# get the set, validate, store outputs
# but need to paginate because of ram issues
for i in xrange(0, 668110, 25):
    print 'QUERYING {0}:{1}'.format(i, 25)
    for response in reader.read('', limit=25, offset=i):
        print response.source_url

        xml = response.cleaned_content
        stderr = validate_in_memory(xml)

        data = {
            "response_id": response.id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now()
コード例 #4
0
#!/anaconda/bin/python

import json
from mpp.loaders import Loader
from mpp.models import Response
import traceback

"""

"""
# set up the connection
with open("local_rds.conf", "r") as f:
    conf = json.loads(f.read())
loader = Loader(conf)

with open("responses_with_federal_schemas.txt", "r") as f:
    responses = [g.strip() for g in f.readlines() if g]

for i, response in enumerate(responses):
    if i % 5000 == 0:
        print "finished: ", i

    with open(response, "r") as f:
        data = json.loads(f.read())

    r = Response()
    r.create(data)

    try:
        loader.load(r)
    except Exception as ex:
コード例 #5
0
from mpp.utils import validate_in_memory
from mpp.models import Validation
from datetime import datetime
import traceback

'''
geared to validate just the federal responses
from the wee little metadata ec2, reading
and writing to rds
'''
# set up the connection
with open('local_rds.conf', 'r') as f:
    conf = json.loads(f.read())
reader = ResponseReader(conf)

loader = Loader(conf)

# get the set, validate, store outputs
# but need to paginate because of ram issues
for i in xrange(0, 668110, 25):
    print 'QUERYING {0}:{1}'.format(i, 25)
    for response in reader.read('', limit=25, offset=i):
        print response.source_url

        xml = response.cleaned_content
        stderr = validate_in_memory(xml)

        data = {
            "response_id": response.id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now()
コード例 #6
0
#!/anaconda/bin/python

import json
from mpp.loaders import Loader
from mpp.models import Response
import traceback
'''

'''
# set up the connection
with open('local_rds.conf', 'r') as f:
    conf = json.loads(f.read())
loader = Loader(conf)

with open('responses_with_federal_schemas.txt', 'r') as f:
    responses = [g.strip() for g in f.readlines() if g]

for i, response in enumerate(responses):
    if i % 5000 == 0:
        print 'finished: ', i

    with open(response, 'r') as f:
        data = json.loads(f.read())

    r = Response()
    r.create(data)

    try:
        loader.load(r)
    except Exception as ex:
        print response