Esempio n. 1
0
def check_data(manifest, sample):
    normalizer = Normalizer()
    normalizer.read_manifest(manifest)
    with open(sample, 'r') as f:
        sample_file = f.read().strip()
        if len(sample_file) > 1:
            for line in sample_file.split('\n')[1:]:
                # line in the manifest: section_id,section_name,row_id,row_name
                line = line.strip()
                elements = line.split(',')
                section_name = elements[0]
                row_name = elements[1]
                # [(section_id, row_id, bool), number of matches, different section ids we have seen, different row_id we have seen]
                # data example: [(None, None, False), 3, set([170, 11, 118]), set([7])]
                data = normalizer.normalize_raw(section_name, row_name)
                # We are looking for matches that result in `False` but are not caused by
                # database returning multiple entries or row_name value passed as a range
                count = 0
                if data[0][2] == False and data[1] <= 1 and '-' not in row_name:
                    count += 1
                    print data, section_name, row_name, row_name.isdigit()
            print 'BAD MATCH: ', count
            assert count == 0