Esempio n. 1
0
#!/usr/bin/python
#coding:utf8
# Created:  2013-11-11
#

import sys
sys.path.append('..')
from Page import Page
from lxml import etree

page = Page()
page.title = u'title'
page.ns = u'0'
page.id = u'10'
page.text = u'text'
page.redirect = u'redirect'

print page.toTuple()
print page.toDict()
print page.toDict(noNone=True)
print etree.tostring(page.toXML())


Esempio n. 2
0
    def getPageData(self,page):
        data = Page()
        for elem in page:
            tag = elem.tag
            if tag == self.tags.title:
                if self.select.title is None:
                    if u'title' in self.fields:
                        data.title = elem.text
                    else:
                        continue
                else:
                    data.title = elem.text
                    if not all(select(data.toTuple()) for select in self.select.title): return None
                    if u'title' not in self.fields: data.title = None
            
            elif tag == self.tags.ns:
                if self.select.ns is None:
                    if u'ns' in self.fields:
                        data.ns = elem.text
                    else:
                        continue
                else:
                    data.ns = elem.text
                    if not all(select(data.toTuple()) for select in self.select.ns): return None
                    if u'ns' not in self.fields: data.ns = None
            
            elif tag == self.tags.id:
                if self.select.id is None:
                    if u'id' in self.fields:
                        data.id = elem.text
                    else:
                        continue
                else:
                    data.id = elem.text
                    if not all(select(data.toTuple()) for select in self.select.id): return None
                    if u'id' not in self.fields: data.id = None
            
            elif tag == self.tags.redirect:
                if self.select.redirect is None:
                    if u'redirect' in self.fields:
                        data.redirect = elem.get(u'title')
                    else:
                        continue
                else:
                    data.redirect = elem.get(u'title')
                    if not all(select(data.toTuple()) for select in self.select.redirect): return None
                    if u'redirect' not in self.fields: data.redirect = None

            elif tag == self.tags.revision:
                if self.select.text is None:
                    if u'text' in self.fields:
                        text = elem.find(self.tags.text).text
                        data.text = text if self.parse_text is None else self.parse_text(text)
                    else:
                        continue
                else:
                    text = elem.find(self.tags.text).text
                    data.text = text if self.parse_text is None else self.parse_text(text)
                    if not all(select(data.toTuple()) for select in self.select.text): return None
                    if u'text' not in self.fields: data.text = None
                break
            elif tag == self.tags.upload:
                break
        return data