Exemple #1
0
 def startElement(self, name, attrs):
     if not name in self.inElement:
         if not self.ignore_missing_tags:
             print "Error: Unknown Label element '%s'." % name
             sys.exit()
         elif not name in self.unknown_tags:
             self.unknown_tags.append(name)
     self.inElement[name] = True
     if name == 'label':
         if not self.inElement['sublabels']:
             self.label = model.Label()
Exemple #2
0
	def startElement(self, name, attrs):
		if name not in self.inElement:
			if not self.ignore_missing_tags:
				print("Error: Unknown Label element '%s'." % name)
				sys.exit()
			elif name not in self.unknown_tags:
				self.unknown_tags.append(name)
		self.inElement[name] = True
		if name == 'label':
			if not self.inElement['sublabels']:
				self.label = model.Label()
		elif name == "image":
			newImage = model.ImageInfo()
			for f in ("height", "type", "uri", "uri150", "width"):
				setattr(newImage, f, attrs[f])
			self.label.images.append(newImage)
			if len(attrs) != 5:
				print("ATTR ERROR")
				print(attrs)
				sys.exit()
 def startElement(self, name, attrs):
     if not name in self.inElement:
         if not self.ignore_missing_tags:
             print "Error: Unknown Label element '%s'." % name
             sys.exit()
         elif not name in self.unknown_tags:
             self.unknown_tags.append(name)
     self.inElement[name] = True
     if name == 'label':
         if not self.inElement['sublabels']:
             self.label = model.Label()
     elif name == "image":
         newImage = model.ImageInfo()
         newImage.height = attrs["height"]
         newImage.imageType = attrs["type"]
         newImage.uri = attrs["uri"]
         newImage.uri150 = attrs["uri150"]
         newImage.width = attrs["width"]
         self.label.images.append(newImage)
         if len(attrs) != 5:
             print "ATTR ERROR"
             print attrs
             sys.exit()
class LabelHandler(xml.sax.handler.ContentHandler):
    inElement = {
        'id': False,
        'label': False,
        'labels': False,
        'data_quality': False,
        'contactinfo': False,
        'image': False,
        'images': False,
        'name': False,
        'profile': False,
        'parentLabel': False,
        'sublabels': False,
        'urls': False,
        'url': False,
    }
    label = model.Label()
    buffer = ''
    unknown_tags = []

    def __init__(self, exporter, stop_after=0, ignore_missing_tags=False):
        self.exporter = exporter
        self.stop_after = stop_after
        self.ignore_missing_tags = ignore_missing_tags

    def startElement(self, name, attrs):
        if not name in self.inElement:
            if not self.ignore_missing_tags:
                print "Error: Unknown Label element '%s'." % name
                sys.exit()
            elif not name in self.unknown_tags:
                self.unknown_tags.append(name)
        self.inElement[name] = True
        if name == 'label':
            if not self.inElement['sublabels']:
                self.label = model.Label()
        elif name == "image":
            newImage = model.ImageInfo()
            newImage.height = attrs["height"]
            newImage.imageType = attrs["type"]
            newImage.uri = attrs["uri"]
            newImage.uri150 = attrs["uri150"]
            newImage.width = attrs["width"]
            self.label.images.append(newImage)
            if len(attrs) != 5:
                print "ATTR ERROR"
                print attrs
                sys.exit()

    def characters(self, data):
        self.buffer += data

    def endDocument(self):
        self.exporter.finish()

    def endElement(self, name):
        self.buffer = self.buffer.strip()
        if name == 'id':
            self.label.id = int(self.buffer)
        if name == 'name':
            if len(self.buffer) != 0:
                self.label.name = self.buffer
        elif name == 'contactinfo':
            if len(self.buffer) != 0:
                self.label.contactinfo = self.buffer
        elif name == 'data_quality':
            if len(self.buffer) != 0:
                self.label.data_quality = self.buffer
        elif name == 'profile':
            if len(self.buffer) != 0:
                self.label.profile = self.buffer
        elif name == 'url':
            if len(self.buffer) != 0:
                self.label.urls.append(self.buffer)
        elif name == 'parentLabel':
            if len(self.buffer) != 0:
                self.label.parentLabel = self.buffer
        elif name == "label":
            if self.inElement['sublabels']:
                if len(self.buffer) != 0:
                    self.label.sublabels.append(self.buffer)
            else:
                self.exporter.storeLabel(self.label)

                global labelCounter
                labelCounter += 1
                if self.stop_after > 0 and labelCounter >= self.stop_after:
                    self.endDocument()
                    if self.ignore_missing_tags and len(self.unknown_tags) > 0:
                        print 'Encountered some unknown Label tags: %s' % (
                            self.unknown_tags)
                    raise model.ParserStopError(labelCounter)

        self.inElement[name] = False
        self.buffer = ''
Exemple #5
0
class LabelHandler(xml.sax.handler.ContentHandler):
	inElement = {
		'id': False,
		'label': False,
		'labels': False,
		'data_quality': False,
		'contactinfo': False,
		'image': False,
		'images': False,
		'name': False,
		'profile': False,
		'parentLabel': False,
		'sublabels': False,
		'urls': False,
		'url': False,
	}
	label = model.Label()
	buffer = ''
	unknown_tags = []

	def __init__(self, exporter, stop_after=0, ignore_missing_tags=False):
		self.exporter = exporter
		self.stop_after = stop_after
		self.ignore_missing_tags = ignore_missing_tags
		self.labelCounter = 0

	def startElement(self, name, attrs):
		if name not in self.inElement:
			if not self.ignore_missing_tags:
				print("Error: Unknown Label element '%s'." % name)
				sys.exit()
			elif name not in self.unknown_tags:
				self.unknown_tags.append(name)
		self.inElement[name] = True
		if name == 'label':
			if not self.inElement['sublabels']:
				self.label = model.Label()
		elif name == "image":
			newImage = model.ImageInfo()
			for f in ("height", "type", "uri", "uri150", "width"):
				setattr(newImage, f, attrs[f])
			self.label.images.append(newImage)
			if len(attrs) != 5:
				print("ATTR ERROR")
				print(attrs)
				sys.exit()

	def characters(self, data):
		self.buffer += data

	def endDocument(self):
		self.exporter.finish()

	def endElement(self, name):
		self.buffer = self.buffer.strip()
		if name == 'id':
			self.label.id = int(self.buffer)
		elif name in ('name', 'contactinfo', 'data_quality', 'profile', 'parentLabel'):
			if len(self.buffer) != 0:
				setattr(self.label, name, self.buffer)
		elif name == 'url':
			if len(self.buffer) != 0:
				self.label.urls.append(self.buffer)
		elif name == "label":
			if self.inElement['sublabels']:
				if len(self.buffer) != 0:
					self.label.sublabels.append(self.buffer)
			else:
				self.exporter.storeLabel(self.label)

				self.labelCounter += 1
				if self.stop_after > 0 and self.labelCounter >= self.stop_after:
					self.endDocument()
					if self.ignore_missing_tags and len(self.unknown_tags) > 0:
						print('Encountered some unknown Label tags: %s' % (self.unknown_tags))
					raise model.ParserStopError(self.labelCounter)

		self.inElement[name] = False
		self.buffer = ''