Exemple #1
0
def XPathTF(etl, data):
    from lxml import etree
    if etl.IsManyData:
        tree = spider.GetHtmlTree(data[etl.Column]);
        nodes = tree.xpath(etl.XPath);
        for node in nodes:
            ext = {'Text': spider.getnodetext(node), 'HTML': etree.tostring(node).decode('utf-8')};
            ext['OHTML'] = ext['HTML']
            yield extends.MergeQuery(ext, data, etl.NewColumn);
    else:
        tree = spider.GetHtmlTree(data[etl.Column]);
        nodes = tree.xpath(etl.XPath);
        data[etl.NewColumn] = nodes[0].text;
        yield data;
Exemple #2
0
def XPathTF(etl, data):
    from lxml import etree
    if etl.IsManyData:
        tree = spider.GetHtmlTree(data[etl.Column]);
        nodes = tree.xpath(etl.XPath);
        for node in nodes:
            ext = {'Text': spider.getnodetext(node), 'HTML': etree.tostring(node).decode('utf-8')};
            ext['OHTML'] = ext['HTML']
            yield extends.MergeQuery(ext, data, etl.NewColumn);
    else:
        tree = spider.GetHtmlTree(data[etl.Column]);
        nodes = tree.xpath(etl.XPath);
        data[etl.NewColumn] = nodes[0].text;
        yield data;
Exemple #3
0
 def transform(self, data):
     from lxml import etree
     if self.IsManyData:
         tree = spider.GetHtmlTree(data[self.Column]);
         nodes = tree.xpath(self.XPath);
         for node in nodes:
             ext = {'Text': spider.getnodetext(node), 'HTML': etree.tostring(node).decode('utf-8')};
             ext['OHTML'] = ext['HTML']
             yield extends.MergeQuery(ext, data, self.NewColumn);
     else:
         tree = spider.GetHtmlTree(data[self.Column]);
         nodes = tree.xpath(self.XPath);
         node=nodes[0]
         if hasattr(node,'text'):
             setValue(data, self, node.text);
         else:
             setValue(data,self,str(node))
         yield data;
Exemple #4
0
 def transform(self, data):
     from lxml import etree
     if self.IsManyData:
         tree = spider.GetHtmlTree(data[self.Column]);
         nodes = tree.xpath(self.XPath);
         for node in nodes:
             ext = {'Text': spider.getnodetext(node), 'HTML': etree.tostring(node).decode('utf-8')};
             ext['OHTML'] = ext['HTML']
             yield extends.MergeQuery(ext, data, self.NewColumn);
     else:
         tree = spider.GetHtmlTree(data[self.Column]);
         nodes = tree.xpath(self.XPath);
         node=nodes[0]
         if hasattr(node,'text'):
             setValue(data, self, node.text);
         else:
             setValue(data,self,str(node))
         yield data;