Esempio n. 1
0
def get_invalid_matches(manifest, input):
    """util function to return any invalid matches when running through a test file"""
    normalizer = Normalizer()
    normalizer.read_manifest(manifest)
    samples = read_input(input)

    matched = normalize_samples(normalizer, samples, verbose=False)

    invalid_matches = [match for match in matched if match['expected'] != match['output']]

    return invalid_matches
Esempio n. 2
0
def check_data(manifest, sample):
    normalizer = Normalizer()
    normalizer.read_manifest(manifest)
    with open(sample, 'r') as f:
        sample_file = f.read().strip()
        if len(sample_file) > 1:
            for line in sample_file.split('\n')[1:]:
                # line in the manifest: section_id,section_name,row_id,row_name
                line = line.strip()
                elements = line.split(',')
                section_name = elements[0]
                row_name = elements[1]
                # [(section_id, row_id, bool), number of matches, different section ids we have seen, different row_id we have seen]
                # data example: [(None, None, False), 3, set([170, 11, 118]), set([7])]
                data = normalizer.normalize_raw(section_name, row_name)
                # We are looking for matches that result in `False` but are not caused by
                # database returning multiple entries or row_name value passed as a range
                count = 0
                if data[0][2] == False and data[1] <= 1 and '-' not in row_name:
                    count += 1
                    print data, section_name, row_name, row_name.isdigit()
            print 'BAD MATCH: ', count
            assert count == 0
Esempio n. 3
0
        description='grader for seatgeek SectionNormalization code test')
    parser.add_argument('--manifest',
                        default=None,
                        help='path to manifest file')
    parser.add_argument('--input', default=None, help='path to input file')
    parser.add_argument('--section',
                        default=None,
                        help='section input (for testing)')
    parser.add_argument('--row', default=None, help='row input (for testing)')

    args = parser.parse_args()

    assert args.manifest

    normalizer = Normalizer()
    normalizer.read_manifest(args.manifest)

    if args.section and args.row:
        section_id, row_id, valid = normalizer.normalize(
            args.section, args.row)
        print """
        Input:
            [section] {}\t[row] {}
        Output:
            [section_id] {}\t[row_id] {}
        Valid?:
            {}
        """.format(args.section, args.row, section_id, row_id, valid)

    elif args.input:
        samples = read_input(args.input)
Esempio n. 4
0
import csv
from normalizer import Normalizer

normalizer = Normalizer()
normalizer.read_manifest('../../manifests/citifield_sections.csv')
# normalizer.read_manifest('../../manifests/dodgerstadium_sections.csv')

inp = []
correct = []

# with open('../../samples/dodgertest.csv') as file:
with open('../../samples/metstest.csv') as file:
    reader = csv.reader(file)
    for line in reader:
        inp.append({'section': line[0], 'row': line[1]})
        if line[4].strip() == 'True':
            correct.append({
                'section_id': line[2],
                'row_id': line[3],
                'valid': True
            })
        else:
            correct.append({
                'section_id': line[2],
                'row_id': line[3],
                'valid': False
            })

inp = inp[1:]
correct = correct[1:]
assert len(inp) == len(correct)