def move_article_and_image(): article_id_sql = "SELECT DISTINCT article_id from t_article_status" target_school_article_sql = "SELECT id FROM t_article WHERE origin = \"中国政法大学\"" result = DBUtil.select_datas(article_id_sql) target_result = DBUtil.select_datas(target_school_article_sql) for item in target_result: result.append(item) image_path = "/data/who_focus/image/" article_path = "/home/jfqiao/wechat_articles/" for item in result: sql = "SELECT url, image_url FROM t_article WHERE id=%s" % item[ "article_id"] article = DBUtil.select_data(sql) article_file_name = article["url"].replace("/", "").replace(":", "") os.system("cp \"%s\" /home/jfqiao/tmp_article/" % (article_path + article_file_name)) os.system("cp \"%s\" /home/jfqiao/tmp_article/" % (article_path + article_file_name + "_abstract")) if article["image_url"].startswith("https://mmbiz.qpic") or article[ "image_url"].startswith("http://mmbiz.qpic"): continue pos = article["image_url"].find("?") if pos == -1: pos = len(article["image_url"]) image_file_name = article["image_url"][:pos].replace(":", "").replace( "/", "") os.system("cp \"%s\" /home/jfqiao/tmp_image/" % (image_path + image_file_name)) os.system("cp \"%s\" /home/jfqiao/tmp_image/" % (image_path + image_file_name + ".txt"))
def get_article_info(date_str): """ 查询publish_time在一定的时间之后的文章ID :param date_str: 给定的时间字符串,格式为 yyyy-mm-dd HH:MM:SS. :return: 返回的结果是一个list,其中每一项都是dict,利用列的名字进行索引即可。 """ sql = "SELECT id, url, image_url FROM t_article WHERE publish_time <= \"%s\"" % date_str result = DBUtil.select_datas(sql) return result