Esempio n. 1
0
class KVGXmlDictionaryReader(_XmlBase):

    def __init__(self):
        self._charcol = CharacterCollection()

    def get_character_collection(self):
        return self._charcol

    def _start_element(self, name, attrs):
        self._tag = name

        if self._first_tag:
            self._first_tag = False
            if self._tag != "kanjis":
                raise ValueError, "The very first tag should be <kanjis>"

        if self._tag == "kanji":
            self._writing = Writing()
            self._utf8 = attrs["midashi"].encode("UTF-8")

        if self._tag == "stroke":
            self._stroke = Stroke()
            if attrs.has_key("path"):
                self._stroke_svg = attrs["path"].encode("UTF-8")
                try:
                  svg_parser = SVG_Parser(self._stroke_svg) 
                  svg_parser.parse()
                  self._stroke.append_points(svg_parser.get_points())
                except:
                  sys.stderr.write("Something went wrong in this character: " + self._utf8 + "\n")
            else:
                print "Missing path in <stroke> element: " + self._utf8
		
            
    def _end_element(self, name):
        if name == "kanji":
            char = Character()
            char.set_utf8(self._utf8)
            char.set_writing(self._writing)
            self._charcol.add_set(self._utf8)
            self._charcol.append_character(self._utf8, char)
            for s in ["_tag", "_stroke"]:
                if s in self.__dict__:
                    del self.__dict__[s]

        if name == "stroke":
            self._writing.append_stroke(self._stroke)
            self._stroke = None

        self._tag = None

    def _char_data(self, data):
        if self._tag == "utf8":
            self._utf8 = data.encode("UTF-8")
        elif self._tag == "width":
            self._writing.set_width(int(data))
        elif self._tag == "height":
            self._writing.set_height(int(data))
Esempio n. 2
0
class KVGXmlDictionaryReader(_XmlBase):
    def __init__(self):
        self._charcol = CharacterCollection()

    def get_character_collection(self):
        return self._charcol

    def _start_element(self, name, attrs):
        self._tag = name

        if self._first_tag:
            self._first_tag = False
            if self._tag != "kanjivg":
                raise ValueError, "The very first tag should be <kanjivg>"

        if self._tag == "kanji":
            self._writing = Writing()
            self._utf8 = unichr(int(attrs["id"].split('_')[1],
                                    16)).encode("UTF-8")

        if self._tag == "path":
            self._stroke = Stroke()
            if attrs.has_key("d"):
                self._stroke_svg = attrs["d"].encode("UTF-8")
                svg_parser = SVG_Parser(self._stroke_svg)
                svg_parser.parse()
                self._stroke.append_points(svg_parser.get_points())
            else:
                sys.stderr.write("Missing data in <path> element: " +
                                 self._utf8 + "\n")

    def _end_element(self, name):
        if name == "kanji":
            char = Character()
            char.set_utf8(self._utf8)
            char.set_writing(self._writing)
            self._charcol.add_set(self._utf8)
            self._charcol.append_character(self._utf8, char)
            for s in ["_tag", "_stroke"]:
                if s in self.__dict__:
                    del self.__dict__[s]

        if name == "path":
            self._writing.append_stroke(self._stroke)
            self._stroke = None

        self._tag = None

    def _char_data(self, data):
        if self._tag == "utf8":
            self._utf8 = data.encode("UTF-8")
        elif self._tag == "width":
            self._writing.set_width(int(data))
        elif self._tag == "height":
            self._writing.set_height(int(data))
Esempio n. 3
0
class KVGXmlDictionaryReader(_XmlBase):
    def __init__(self):
        self._charcol = CharacterCollection()

    def get_character_collection(self):
        return self._charcol

    def _start_element(self, name, attrs):
        self._tag = name

        if self._first_tag:
            self._first_tag = False
            if self._tag != "kanjivg":
                raise ValueError, "The very first tag should be <kanjivg>"

        if self._tag == "kanji":
            self._writing = Writing()
            self._utf8 = unichr(int(attrs["id"].split("_")[1], 16)).encode("UTF-8")

        if self._tag == "path":
            self._stroke = Stroke()
            if attrs.has_key("d"):
                self._stroke_svg = attrs["d"].encode("UTF-8")
                svg_parser = SVG_Parser(self._stroke_svg)
                svg_parser.parse()
                self._stroke.append_points(svg_parser.get_points())
            else:
                sys.stderr.write("Missing data in <path> element: " + self._utf8 + "\n")

    def _end_element(self, name):
        if name == "kanji":
            char = Character()
            char.set_utf8(self._utf8)
            char.set_writing(self._writing)
            self._charcol.add_set(self._utf8)
            self._charcol.append_character(self._utf8, char)
            for s in ["_tag", "_stroke"]:
                if s in self.__dict__:
                    del self.__dict__[s]

        if name == "path":
            self._writing.append_stroke(self._stroke)
            self._stroke = None

        self._tag = None

    def _char_data(self, data):
        if self._tag == "utf8":
            self._utf8 = data.encode("UTF-8")
        elif self._tag == "width":
            self._writing.set_width(int(data))
        elif self._tag == "height":
            self._writing.set_height(int(data))
Esempio n. 4
0
	def get_character_collection(self):
		charcol = CharacterCollection()

		# group characters with the same label into sets
		sets = {}
		for i in range(len(self._labels)):
			# Create Character
			writing = Writing()
			if self.height and self.width:
				writing.set_height(self.height)
				writing.set_width(self.width)

			for delin_range in self._delineations[i]:
				if delin_range.start_comp == (delin_range.end_comp - 1):
					stroke_points = self._strokes[delin_range.start_comp][delin_range.start_point:delin_range.end_point]
					writing.append_stroke(Stroke.from_list(stroke_points))
				else:
					# add first stroke to writing
					start_stroke_points = self._strokes[delin_range.start_comp][delin_range.start_point:-1]
					if len(start_stroke_points) > 0:
						writing.append_stroke(Stroke.from_list(start_stroke_points))

					# add last stroke to writing
					end_stroke_points = self._strokes[delin_range.end_comp - 1][0:delin_range.end_point]
					if len(end_stroke_points) > 0:
						writing.append_stroke(Stroke.from_list(end_stroke_points))

					# add the remaining strokes to writing
					for stroke in self._strokes[delin_range.start_comp + 1:delin_range.end_comp - 1]:
						writing.append_stroke(stroke)

			character = Character()
			character.set_writing(writing)

			utf8 = self._labels[i]
			character.set_utf8(utf8)

			sets[utf8] = sets.get(utf8, []) + [character]

		charcol.add_sets(sets.keys())

		for set_name, characters in sets.items():
			charcol.append_characters(set_name, characters)

		return charcol
Esempio n. 5
0
class TomoeXmlDictionaryReader(_XmlBase):
    def __init__(self):
        self._charcol = CharacterCollection()

    def get_character_collection(self):
        return self._charcol

    def _start_element(self, name, attrs):
        self._tag = name

        if self._first_tag:
            self._first_tag = False
            if self._tag != "dictionary":
                raise ValueError, "The very first tag should be <dictionary>"

        if self._tag == "character":
            self._writing = Writing()

        if self._tag == "stroke":
            self._stroke = Stroke()

        elif self._tag == "point":
            point = Point()

            for key in ("x", "y", "pressure", "xtilt", "ytilt", "timestamp"):
                if attrs.has_key(key):
                    value = attrs[key].encode("UTF-8")
                    if key in ("pressure", "xtilt", "ytilt"):
                        value = float(value)
                    else:
                        value = int(float(value))
                else:
                    value = None

                setattr(point, key, value)

            self._stroke.append_point(point)

    def _end_element(self, name):
        if name == "character":
            char = Character()
            char.set_utf8(self._utf8)
            char.set_writing(self._writing)
            self._charcol.add_set(self._utf8)
            self._charcol.append_character(self._utf8, char)

            for s in ["_tag", "_stroke"]:
                if s in self.__dict__:
                    del self.__dict__[s]

        if name == "stroke":
            self._writing.append_stroke(self._stroke)
            self._stroke = None

        self._tag = None

    def _char_data(self, data):
        if self._tag == "utf8":
            self._utf8 = data.encode("UTF-8")
        elif self._tag == "width":
            self._writing.set_width(int(data))
        elif self._tag == "height":
            self._writing.set_height(int(data))
Esempio n. 6
0
class TomoeXmlDictionaryReader(_XmlBase):

    def __init__(self):
        self._charcol = CharacterCollection()

    def get_character_collection(self):
        return self._charcol

    def _start_element(self, name, attrs):
        self._tag = name

        if self._first_tag:
            self._first_tag = False
            if self._tag != "dictionary":
                raise ValueError, "The very first tag should be <dictionary>"

        if self._tag == "character":
            self._writing = Writing()

        if self._tag == "stroke":
            self._stroke = Stroke()
            
        elif self._tag == "point":
            point = Point()

            for key in ("x", "y", "pressure", "xtilt", "ytilt", "timestamp"):
                if attrs.has_key(key):
                    value = attrs[key].encode("UTF-8")
                    if key in ("pressure", "xtilt", "ytilt"):
                        value = float(value)
                    else:
                        value = int(float(value))
                else:
                    value = None

                setattr(point, key, value)

            self._stroke.append_point(point)

    def _end_element(self, name):
        if name == "character":
            char = Character()
            char.set_utf8(self._utf8)
            char.set_writing(self._writing)
            self._charcol.add_set(self._utf8)
            self._charcol.append_character(self._utf8, char)

            for s in ["_tag", "_stroke"]:
                if s in self.__dict__:
                    del self.__dict__[s]

        if name == "stroke":
            self._writing.append_stroke(self._stroke)
            self._stroke = None

        self._tag = None

    def _char_data(self, data):
        if self._tag == "utf8":
            self._utf8 = data.encode("UTF-8")
        elif self._tag == "width":
            self._writing.set_width(int(data))
        elif self._tag == "height":
            self._writing.set_height(int(data))