Ejemplo n.º 1
0
def move_article_and_image():
    article_id_sql = "SELECT DISTINCT article_id from t_article_status"
    target_school_article_sql = "SELECT id FROM t_article WHERE origin = \"中国政法大学\""
    result = DBUtil.select_datas(article_id_sql)
    target_result = DBUtil.select_datas(target_school_article_sql)
    for item in target_result:
        result.append(item)
    image_path = "/data/who_focus/image/"
    article_path = "/home/jfqiao/wechat_articles/"
    for item in result:
        sql = "SELECT url, image_url FROM t_article WHERE id=%s" % item[
            "article_id"]
        article = DBUtil.select_data(sql)
        article_file_name = article["url"].replace("/", "").replace(":", "")
        os.system("cp \"%s\" /home/jfqiao/tmp_article/" %
                  (article_path + article_file_name))
        os.system("cp \"%s\" /home/jfqiao/tmp_article/" %
                  (article_path + article_file_name + "_abstract"))
        if article["image_url"].startswith("https://mmbiz.qpic") or article[
                "image_url"].startswith("http://mmbiz.qpic"):
            continue
        pos = article["image_url"].find("?")
        if pos == -1:
            pos = len(article["image_url"])
        image_file_name = article["image_url"][:pos].replace(":", "").replace(
            "/", "")
        os.system("cp \"%s\" /home/jfqiao/tmp_image/" %
                  (image_path + image_file_name))
        os.system("cp \"%s\" /home/jfqiao/tmp_image/" %
                  (image_path + image_file_name + ".txt"))
Ejemplo n.º 2
0
 def get_article_info(date_str):
     """
     查询publish_time在一定的时间之后的文章ID
     :param date_str:  给定的时间字符串,格式为 yyyy-mm-dd HH:MM:SS.
     :return: 返回的结果是一个list,其中每一项都是dict,利用列的名字进行索引即可。
     """
     sql = "SELECT id, url, image_url FROM t_article WHERE publish_time <= \"%s\"" % date_str
     result = DBUtil.select_datas(sql)
     return result