Beispiel #1
0
            block_id = _child.attrib["block-id"]
            language_pair = '{0}2{1}'.format(_child.attrib["source-language"],
                                             _child.attrib["target-language"])

            # Hotfix potentially wrong ISO codes;  we are using ISO-639-3.
            iso_639_2_to_3_mapping = {'cze': 'ces', 'fre': 'fra', 'ger': 'deu'}
            for part2_code, part3_code in iso_639_2_to_3_mapping.items():
                language_pair = language_pair.replace(part2_code, part3_code)

            try:
                _total = _total + 1
                _hit_xml = tostring(_child, encoding="utf-8").decode('utf-8')

                if args.dry_run_enabled:
                    _ = HIT(block_id=block_id,
                            hit_xml=_hit_xml,
                            language_pair=language_pair,
                            mturk_only=args.mturk_only)

                else:
                    # Use get_or_create() to avoid exact duplicates.  We do allow
                    # them for WMT13 to measure intra-annotator agreement...
                    h = HIT(block_id=block_id,
                            hit_xml=_hit_xml,
                            language_pair=language_pair,
                            mturk_only=args.mturk_only)
                    h.save()

            # pylint: disable-msg=W0703
            except Exception, msg:
                print msg
                _errors = _errors + 1