Python WEB Examples

Programming Language: Python

Namespace/Package Name: WEB

Class/Type: WEB

Examples at hotexamples.com: 6

Python WEB - 6 examples found. These are the top rated real world Python examples of WEB.WEB extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

WEB(3)

getRawData(1)

Example #1

Show file

File: CNYES.py Project: yenchih/NewsInsight

class CNYES:
    link_url = 'http://news.cnyes.com/Ajax.aspx?Module=GetRollNews'

    def __init__(self):
        self.web = WEB()

    def fetchListDOM(self, datum):
        the_page = self.web.getRawData(self.link_url, datum)
        # 將網頁轉成結構化資料
        parser = etree.XMLParser()
        root = etree.parse(StringIO.StringIO(the_page), parser)
        # 抓指定位置的連結
        return root.xpath('.//Table1')

    def fetchContent(self, url):
        the_page = self.web.getRawData(url)
        parser = etree.HTMLParser()
        root = etree.parse(StringIO.StringIO(the_page), parser)
        contents = root.xpath('//*[@id="newsText"]//text()')
        content = []
        for c in contents:
            part = c.strip()
            if part != "":
                content.append(part)
        info = root.xpath("//*[contains(@class, 'info')]")[0].text.strip()
        return {"content": content, "info": info}

    def fetchNews(self, data, target_dt, limit=0):
        rows = self.fetchListDOM(data)
        rows_cnt = len(rows)
        i = 0
        news = []
        for row in rows:
            try:
                datum = {}
                datum['Title'] = row.xpath('.//NEWSTITLE')[0].text.strip()
                datum['Link'] = 'http://news.cnyes.com' + row.xpath(
                    './/SNewsSavePath')[0].text.strip()
                datum['ClassCN'] = row.xpath('.//ClassCName')[0].text.strip()
                datum['ClassEN'] = row.xpath('.//CLASSENAME')[0].text.strip()
                #datum['NewsTime'] = t.strftime("%Y-%m-%d ")+row.xpath('.//NewsTime')[0].text.strip()
                datum['NewsTime'] = datetime.datetime.strptime(
                    target_dt.strftime("%Y-%m-%d ") +
                    row.xpath('.//NewsTime')[0].text.strip(),
                    "%Y-%m-%d %H:%M:%S")
                #datum['CreateDate'] = row.xpath('.//CreateDate')[0].text.strip().replace("T"," ").replace("+08:00","")
                datum['CreateDate'] = datetime.datetime.strptime(
                    row.xpath('.//CreateDate')[0].text.strip().replace(
                        "T", " ").replace("+08:00", ""), "%Y-%m-%d %H:%M:%S")
                news.append(datum)
            except AttributeError:
                print 'there exists a None object '

            if limit > 0 and i >= limit:
                break
        return news

Example #2

Show file

File: CNYES.py Project: ntuaha/NewsInsight2

class CNYES:
  link_url = 'http://news.cnyes.com/Ajax.aspx?Module=GetRollNews'
  def __init__(self):
    self.web = WEB()


  def fetchListDOM(self,datum):
    the_page = self.web.getRawData(self.link_url,datum)
    # 將網頁轉成結構化資料
    parser = etree.XMLParser()
    root = etree.parse(StringIO.StringIO(the_page),parser)
    # 抓指定位置的連結
    return root.xpath('.//Table1')

  def fetchContent(self,url):
      the_page = self.web.getRawData(url)
      parser = etree.HTMLParser()
      root = etree.parse(StringIO.StringIO(the_page),parser)
      contents =  root.xpath('//*[@id="newsText"]//text()')
      content = []
      for c in contents:
        part = c.strip()
        if part != "":
          content.append(part)
      info = root.xpath("//*[contains(@class, 'info')]")[0].text.strip()
      return {"content":content,"info":info}


  def fetchNews(self,data,limit=0):
    rows = self.fetchListDOM(data)
    rows_cnt = len(rows)
    i = 0
    news = []
    for row in rows:
      #計數器
      i = i+1
      print "\r[%d/%d] (%.2f%%)"%(i,rows_cnt,float(i)/rows_cnt*100.0),
      datum ={}
      datum['Title'] = row.xpath('.//NEWSTITLE')[0].text.strip()
      datum['Link'] = 'http://news.cnyes.com'+row.xpath('.//SNewsSavePath')[0].text.strip()
      datum['ClassCN'] = row.xpath('.//ClassCName')[0].text.strip()
      datum['ClassEN'] = row.xpath('.//CLASSENAME')[0].text.strip()
      #datum['NewsTime'] = t.strftime("%Y-%m-%d ")+row.xpath('.//NewsTime')[0].text.strip()
      datum['NewsTime'] = datetime.datetime.strptime(t.strftime("%Y-%m-%d ")+row.xpath('.//NewsTime')[0].text.strip(),"%Y-%m-%d %H:%M:%S")
      #datum['CreateDate'] = row.xpath('.//CreateDate')[0].text.strip().replace("T"," ").replace("+08:00","")
      datum['CreateDate'] = datetime.datetime.strptime(row.xpath('.//CreateDate')[0].text.strip().replace("T"," ").replace("+08:00",""),"%Y-%m-%d %H:%M:%S")
      try:
        d = self.fetchContent(datum['Link'])
        datum['Content'],datum["Info"] = d["content"],d["info"]
      except:
        print "Error at getContent"
      news.append(datum)
      if limit>0 and i >=limit:
        break
    return news

Example #3

Show file

 def __init__(self, init_link):
     self.web = WEB()
     self.init_link = init_link

Example #4

Show file

File: CNYES.py Project: yenchih/NewsInsight

 def __init__(self):
     self.web = WEB()

Example #5

Show file

File: PTT.py Project: lulalachen/GodzillaAlert

 def __init__(self, init_link, diaster_type):
     self.web = WEB()
     self.init_link = init_link
     self.diaster_type = diaster_type

Example #6

Show file

File: CNYES.py Project: ntuaha/NewsInsight2

 def __init__(self):
   self.web = WEB()