for k, v in word_gts.iteritems():

        base_name = k
        split_base_name = base_name.split('-')

        folder_and_base = "{}/{}-{}/{}".format(split_base_name[0], split_base_name[0], split_base_name[1], base_name)
        image_file = "{}.png".format(folder_and_base)

        compare_line = "-".join(base_name.split("-")[:3])
        if not compare_line in in_set:
            continue

        data_set.append({
            "gt": v['gt'],
            "image_path": image_file,
            "err": v['err']
        })
    return data_set

def get_gt(in_set):
    author_mapping, lines_gts, word_gts = prep_iam_writer_map.get_mapping('xml')
    data_set = prep_data_set(word_gts, in_set)
    return data_set


if "__main__" == __name__:

    training_set, val1_set, val2_set, test_set = load_set.load()
    data_set = get_gt(training_set)
    print len(data_set)
Esempio n. 2
0
import generate_gt_from_txt_w
import generate_gt_from_xml_w
import load_set

class hashabledict(dict):
  def __key(self):
    return tuple((k,self[k]) for k in sorted(self))
  def __hash__(self):
    return hash(self.__key())
  def __eq__(self, other):
    return self.__key() == other.__key()

if __name__ == "__main__":
    sets = load_set.load()

    for s in sets:
        xml_data_set = generate_gt_from_xml_w.get_gt(s)
        txt_data_set = generate_gt_from_txt_w.get_gt(s)

        xml_set = set([hashabledict(x) for x in xml_data_set])
        txt_set = set([hashabledict(x) for x in txt_data_set])

        print len(xml_set)
        print len(txt_set)
        print "Sym Diff (should be zero):", len(xml_set ^ txt_set)