def get_qsyk_and_insert(self, docid): cover_img = MySQLdb.escape_string(docid['cover_img']) docid = docid['docid'] if self.db_has_exist(docid): return url = "http://c.3g.163.com/nc/article/%s/full.html" % str(docid) data = utils.download_page(url, True) if data: data = data[docid] if data: ptime = data['ptime'] today = ptime.split(' ')[0] imgs = data['img'] body = data['body'].encode('utf-8') title = data['title'].replace(' ', '').replace('(', '-').replace('(', '-').replace(')', '').replace(')', '') for img in imgs: body = body.replace(img['ref'], "<img src=\"" + img['src'] + "\"/><hr>") body = body.replace('%', '%%') body = MySQLdb.escape_string(body) sql = "insert into wangyi(item_type, title, url, docid, cover_img, ptime, today, body) values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (self._item_type, title, url, docid, cover_img, ptime, today, body) utils.insert_mysql(sql)
def get_qsyk_and_insert(self, docid): cover_img = MySQLdb.escape_string(docid['cover_img']) docid = docid['docid'] if self.db_has_exist(docid): return url = "http://c.3g.163.com/nc/article/%s/full.html" % str(docid) data = utils.download_page(url, True) if data: data = data[docid] if data: ptime = data['ptime'] today = ptime.split(' ')[0] imgs = data['img'] body = data['body'].encode('utf-8') title = data['title'].replace(' ', '').replace( '(', '-').replace('(', '-').replace(')', '').replace(')', '') for img in imgs: body = body.replace( img['ref'], "<img src=\"" + img['src'] + "\"/><hr>") body = body.replace('%', '%%') body = MySQLdb.escape_string(body) sql = "insert into wangyi(item_type, title, url, docid, cover_img, ptime, today, body) values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % ( self._item_type, title, url, docid, cover_img, ptime, today, body) utils.insert_mysql(sql)
def get_docid_from_json(self): """ 根据指定的起始、结束区间,提取这个区间的每日轻松一刻的 url 关键元素 """ url = self._list_url + str(self._start) + "-" + str(self._end) + ".html" self._data = utils.download_page(url) if self._data: self._data = json.loads(self._data) if self._data.has_key(self._list_docid): self._data = self._data[self._list_docid] self.extract_docid()
def get_docid_from_json(self): """ 根据指定的起始、结束区间,提取这个区间的每日轻松一刻的 url 关键元素 """ url = self._list_url + str(self._start) + "-" + str( self._end) + ".html" self._data = utils.download_page(url) if self._data: self._data = json.loads(self._data) if self._data.has_key(self._list_docid): self._data = self._data[self._list_docid] self.extract_docid()