예제 #1
0
 def __init__(self, stream=sys.stdout):
     self.stream = stream
     self.xmlparser = SAXContentWriter(stream)
     self.xmlparser.startDocument()
     self._add_comment()
     self.xmlparser.startElement("output")
     self._labels = {}
 def __init__(self, stream=sys.stdout):
     self.stream = stream
     self.xmlparser = SAXContentWriter(stream)
     self.xmlparser.startDocument()
     self._add_comment()
     self.xmlparser.startElement("output")
     self._labels = {}
class Model2XMLConverter(object):
    """
    Functionality of this class allows to convert hierarchical object model of
    RRS database (module rrslib.db.model) to RRS-XML format.

    If translating object with some new attributes unknown to model (in model
    they are specified as type _UnknownType, the converter will omit these
    attributes.
    """
    def __init__(self, stream=sys.stdout):
        self.stream = stream
        self.xmlparser = SAXContentWriter(stream)
        self.xmlparser.startDocument()
        self._add_comment()
        self.xmlparser.startElement("output")
        self._labels = {}


    def _add_comment(self):
        c = "\nOrigin: This XML was generated by Model2XMLConverter\n"
        c += "Format: $RRS-XML$\n"
        c += "Time:   $%s$\n" % time.ctime()
        self.xmlparser.comment(c)


    def _resolve_obj_name(self, obj):
        clsname = str(obj.__class__).split("'")[1].split(".")[-1]
        return clsname[3:].lower()


    def _get_obj_attrs(self, obj):
        a = {}
        for attr in ("id", "credibility", "module"):
            if obj.isset(attr):
                xmlattr = attr
                objattr = attr
                if attr == "module":
                    xmlattr = "source-module"
                a[xmlattr] = str(obj.get(objattr))
        return a


    def _get_role(self, attrstr):
        role = None
        tagname = attrstr
        if "_" in attrstr:
            # if not in tables, it has a probably a role
            if attrstr not in db_tables:
                sp = attrstr.split("_")
                tagname = "_".join(sp[:-1])
                # if it wasnt a role, but attribute with underscore
                if tagname not in db_tables:
                    tagname = "_".join(sp[1:])
                    if tagname not in db_tables:
                        return (attrstr, None)
                    return (tagname, sp[0])
                role = sp[-1]
        if attrstr == "publisher":
            return ('organization', attrstr)
        return (tagname, role)


    def _convert_raw_attribute(self, obj, attrstr):
        # create element
        if obj.isset(attrstr):
            if isinstance(obj, _RRSDbEntityRelationship):
                attr = obj.get_attribute(attrstr)
            else:
                attr = obj.get(attrstr)
            val = attr
            tagname, role = self._get_role(attrstr)
            if type(attr) == bool:
                if attr:
                    val = 'yes'
                else:
                    val = 'no'
            val = self._unicode(val)
            d = {"value": val}
            if role is not None:
                d['role'] = role
            self.xmlparser.startElement(tagname, d)
            self.xmlparser.endElement()

    def _hash(self):
        while 1:
            h = "".join([hex(random.randint(0,16)).lstrip("0x") for x in range(0,5)])
            if h in self._labels.values(): continue
            return h


    def _unicode(self, string):
        if isinstance(string, unicode):
            return string
        return unicode(str(string), encoding='utf-8')


    def _conversion(self, obj, role=None, add_relation_attributes={}):
        if not isinstance(obj, _RRSDatabaseEntity):
            raise  RRSXMLConverterError("Error while conversion model->rrs-xml. " \
                                        "Failed to convert %s", (str(type(obj))))
        name = self._resolve_obj_name(obj)
        attrs = self._get_obj_attrs(obj)
        if role is not None:
            attrs['role'] = role
        for k in add_relation_attributes:
            attrs[k] = self._unicode(add_relation_attributes[k])
        # create label or ref
        if obj in self._labels:
            attrs['ref'] = self._labels[obj]
            self.xmlparser.startElement(self._unicode(name), attrs)
            self.xmlparser.endElement()
            return
        else:
            hsh = self._hash()
            self._labels[obj] = hsh
            attrs['label'] = hsh
        self.xmlparser.startElement(self._unicode(name), attrs)
        for attrstr in obj.__dict__:
            if attrstr in ("__types__", "id", "credibility", "module", "lev", "_table_name"):
                continue
            # pass all unknown attributes
            if attrstr in obj.__types__ and obj.__types__[attrstr] == _UnknownType:
                continue
            attr = getattr(obj, attrstr)

            # handle junction objects (abstraction of N:N junc. tables)
            # these are represented by RRSRelationship***** objects
            if type(attr) is list:
                # iterate over all Relationship objects in list
                _, role = self._get_role(attrstr)
                for item in attr:
                    raw_rel_attrs = {}
                    # add all attributes of the relationship
                    for entattrstr in item.__dict__:
                        if entattrstr in ("__types__", "_parent", "_entities"):
                            continue
                        entattr = getattr(item, entattrstr)
                        if entattr is not None:
                            raw_rel_attrs[entattrstr] = str(entattr)
                        #if isinstance(entattr, _RRSDatabaseEntity):
                        #    self._conversion(entattr)
                        #else:
                        #    self._convert_raw_attribute(item, entattrstr)
                    # iterate over all entities of relation (i.e Publication and Project)
                    for ent in item._entities:
                        self._conversion(ent, role=role, add_relation_attributes=raw_rel_attrs)
            elif isinstance(attr, _RRSDatabaseEntity):
                _ , role = self._get_role(attrstr)
                self._conversion(attr, role=role)
            else:
                self._convert_raw_attribute(obj, attrstr)

        self.xmlparser.endElement()


    def convert(self, *args):
        """
        Converts hierarchical object model of RRS database to rrs-xml format in
        order of RRS-XML specification on
        https://merlin.fit.vutbr.cz/nlp-wiki/index.php/Rrs_moduleapi
        """
        self._labels = {} # clean labels
        # if obj is sequence or list or some iterable, go through
        for obj in args:
            if isinstance(obj, list):
                for o in obj:
                    o.compile()
                    self._conversion(o)
            else:
                obj.compile()
                self._conversion(obj)
        self.xmlparser.endDocument()
예제 #4
0
class Model2XMLConverter(object):
    """
    Functionality of this class allows to convert hierarchical object model of
    RRS database (module rrslib.db.model) to RRS-XML format.

    If translating object with some new attributes unknown to model (in model
    they are specified as type _UnknownType, the converter will omit these
    attributes.
    """
    def __init__(self, stream=sys.stdout):
        self.stream = stream
        self.xmlparser = SAXContentWriter(stream)
        self.xmlparser.startDocument()
        self._add_comment()
        self.xmlparser.startElement("output")
        self._labels = {}

    def _add_comment(self):
        c = "\nOrigin: This XML was generated by Model2XMLConverter\n"
        c += "Format: $RRS-XML$\n"
        c += "Time:   $%s$\n" % time.ctime()
        self.xmlparser.comment(c)

    def _resolve_obj_name(self, obj):
        clsname = str(obj.__class__).split("'")[1].split(".")[-1]
        return clsname[3:].lower()

    def _get_obj_attrs(self, obj):
        a = {}
        for attr in ("id", "credibility", "module"):
            if obj.isset(attr):
                xmlattr = attr
                objattr = attr
                if attr == "module":
                    xmlattr = "source-module"
                a[xmlattr] = str(obj.get(objattr))
        return a

    def _get_role(self, attrstr):
        role = None
        tagname = attrstr
        if "_" in attrstr:
            # if not in tables, it has a probably a role
            if attrstr not in db_tables:
                sp = attrstr.split("_")
                tagname = "_".join(sp[:-1])
                # if it wasnt a role, but attribute with underscore
                if tagname not in db_tables:
                    tagname = "_".join(sp[1:])
                    if tagname not in db_tables:
                        return (attrstr, None)
                    return (tagname, sp[0])
                role = sp[-1]
        if attrstr == "publisher":
            return ('organization', attrstr)
        return (tagname, role)

    def _convert_raw_attribute(self, obj, attrstr):
        # create element
        if obj.isset(attrstr):
            if isinstance(obj, _RRSDbEntityRelationship):
                attr = obj.get_attribute(attrstr)
            else:
                attr = obj.get(attrstr)
            val = attr
            tagname, role = self._get_role(attrstr)
            if type(attr) == bool:
                if attr:
                    val = 'yes'
                else:
                    val = 'no'
            val = self._unicode(val)
            d = {"value": val}
            if role is not None:
                d['role'] = role
            self.xmlparser.startElement(tagname, d)
            self.xmlparser.endElement()

    def _hash(self):
        while 1:
            h = "".join(
                [hex(random.randint(0, 16)).lstrip("0x") for x in range(0, 5)])
            if h in self._labels.values(): continue
            return h

    def _unicode(self, string):
        if isinstance(string, unicode):
            return string
        return unicode(str(string), encoding='utf-8')

    def _conversion(self, obj, role=None, add_relation_attributes={}):
        if not isinstance(obj, _RRSDatabaseEntity):
            raise  RRSXMLConverterError("Error while conversion model->rrs-xml. " \
                                        "Failed to convert %s", (str(type(obj))))
        name = self._resolve_obj_name(obj)
        attrs = self._get_obj_attrs(obj)
        if role is not None:
            attrs['role'] = role
        for k in add_relation_attributes:
            attrs[k] = self._unicode(add_relation_attributes[k])
        # create label or ref
        if obj in self._labels:
            attrs['ref'] = self._labels[obj]
            self.xmlparser.startElement(self._unicode(name), attrs)
            self.xmlparser.endElement()
            return
        else:
            hsh = self._hash()
            self._labels[obj] = hsh
            attrs['label'] = hsh
        self.xmlparser.startElement(self._unicode(name), attrs)
        for attrstr in obj.__dict__:
            if attrstr in ("__types__", "id", "credibility", "module", "lev",
                           "_table_name"):
                continue
            # pass all unknown attributes
            if attrstr in obj.__types__ and obj.__types__[
                    attrstr] == _UnknownType:
                continue
            attr = getattr(obj, attrstr)

            # handle junction objects (abstraction of N:N junc. tables)
            # these are represented by RRSRelationship***** objects
            if type(attr) is list:
                # iterate over all Relationship objects in list
                _, role = self._get_role(attrstr)
                for item in attr:
                    raw_rel_attrs = {}
                    # add all attributes of the relationship
                    for entattrstr in item.__dict__:
                        if entattrstr in ("__types__", "_parent", "_entities"):
                            continue
                        entattr = getattr(item, entattrstr)
                        if entattr is not None:
                            raw_rel_attrs[entattrstr] = str(entattr)
                        #if isinstance(entattr, _RRSDatabaseEntity):
                        #    self._conversion(entattr)
                        #else:
                        #    self._convert_raw_attribute(item, entattrstr)
                    # iterate over all entities of relation (i.e Publication and Project)
                    for ent in item._entities:
                        self._conversion(ent,
                                         role=role,
                                         add_relation_attributes=raw_rel_attrs)
            elif isinstance(attr, _RRSDatabaseEntity):
                _, role = self._get_role(attrstr)
                self._conversion(attr, role=role)
            else:
                self._convert_raw_attribute(obj, attrstr)

        self.xmlparser.endElement()

    def convert(self, *args):
        """
        Converts hierarchical object model of RRS database to rrs-xml format in
        order of RRS-XML specification on
        https://merlin.fit.vutbr.cz/nlp-wiki/index.php/Rrs_moduleapi
        """
        self._labels = {}  # clean labels
        # if obj is sequence or list or some iterable, go through
        for obj in args:
            if isinstance(obj, list):
                for o in obj:
                    o.compile()
                    self._conversion(o)
            else:
                obj.compile()
                self._conversion(obj)
        self.xmlparser.endDocument()