def main(): # get the articles that ac_acticle_number % 7 == 0 # so that we are able to process only 1/7 articles every day, and 7/7 every week sql = "SELECT ac_article_number "\ "FROM `ac_article` "\ "ORDER BY ac_article_date DESC LIMIT 3000 " n = cursor.execute(sql) articles_to_process = cursor.fetchall() link = '/api/?type=xml¤t=yes&charset=utf8&id=' i = 0 for article_to_process in articles_to_process: i+=1 retfile = ac_core.gethtmlfile( 'www.acfun.tv', link+str(article_to_process[0]) ) if retfile == "ERROR": # this aricle may be removed mark_removed(article_to_process[0]) else: try: info = parseString(retfile) except: print str(article_to_process[0])+" xml broken" continue keywords = '' if i<600: keywords = getText(info, 'keywords') update_article(article_to_process[0], (getText(info, 'arctitle'), getText(info, 'typeid'), getText(info, 'memberID'), getText(info, 'description'), getText(info, 'click'), getText(info, 'stow'), keywords)) cursor.close() conn.close() print "\ndone"
def main(): sql = "SELECT ac_article_id, ac_article_link FROM `ac_article` WHERE ac_author_name = '' and ac_author_id = 0" n = cursor.execute(sql) articles_to_process = cursor.fetchall() for ac in articles_to_process: print ac[1] retfile = ac_core.gethtmlfile( 'www.acfun.tv', '/v/'+ac[1]+'/' ) fix_article(retfile, ac[0]) cursor.close() conn.close() print "\ndone"
def main(): # get the articles that ac_acticle_number % 7 == 0 # so that we are able to process only 1/7 articles every day, and 7/7 every week sql = "SELECT ac_article_number " "FROM `ac_article` " "ORDER BY ac_article_date ASC" n = cursor.execute(sql) articles_to_process = cursor.fetchall() link = "/api/?type=xml¤t=yes&charset=utf8&id=" i = 0 for article_to_process in articles_to_process: i += 1 retfile = ac_core.gethtmlfile("www.acfun.tv", link + str(article_to_process[0])) if retfile == "ERROR": # this aricle may be removed mark_removed(article_to_process[0]) else: try: info = parseString(retfile) except: print str(article_to_process[0]) + " xml broken" continue keywords = "" if i < 600: keywords = getText(info, "keywords") update_article( article_to_process[0], ( getText(info, "arctitle"), getText(info, "typeid"), getText(info, "memberID"), getText(info, "description"), getText(info, "click"), getText(info, "stow"), keywords, ), ) cursor.close() conn.close() print "\ndone"
def main(): end = 120 if len(sys.argv)==2: sql = "SELECT COUNT(*) AS count FROM ac_article WHERE ac_article_category = %s"%sys.argv[1] cursor.execute(sql) row = cursor.fetchone() end = row[0]/15 if end == 0: return else: end += 10 category = ('1', '8', '9', '10', '13', '14') for cate in category: link = '/plus/list.php?typeid='+cate+'&PageNo=' for i in range(1, end): retfile = ac_core.gethtmlfile( 'www.acfun.tv', link+str(i) ) process_text(retfile.decode('GB18030'), cate) cursor.close() conn.close() print "\ndone"