def page_parser(self, url, page_content):
     """docstring for page_parser"""
     con = helpers.mssqlconn()
     paths = self.PAPER_PATH_RE.findall(page_content)
     for i in paths:
         try:
             #con.execute_non_query(self.SUB_SQL, (int(self.PAPER_ID_RE.search(i).group(1)), i))
             print self.PAPER_ID_RE.search(i).group(1), i
         except Exception:
             pass
     con.close()
 def content_parser(self, url, page_content):
     """docstring for content_parser"""
     con = helpers.mssqlconn()
     p = pq(page_content)
     paper_id = int(self.PAPER_ID_RE.search(url).group(1))
     title = p('h1').text() or '-----'
     author = p('.author a').text() or '-----'
     abstract = p('.abstrack').text() or '-----'
     keywords = p('.keywords a').text() or '-----'
     paper_class = p('#wxClass').attr.value or '-----'
     download_url = '-----'
     try:
         con.execute_non_query(self.INSERT_SQL,
                               (paper_id, title, author, abstract,
                                keywords, paper_class, download_url))
         print paper_id
     except Exception:
         print 'not', paper_id