예제 #1
0
def get_comic_id_with_status():
    find_comic_id_list = "select id from Comic where status = 0"
    conn = comic_hentai_data_source.get_conn()
    cursor = conn.cursor()
    cursor.execute(find_comic_id_list)
    comic_id_list = cursor.fetchall()
    return comic_id_list
예제 #2
0
def get_classified_id_list():
    find_classified_id_list = "select id from Classified"
    conn = comic_hentai_data_source.get_conn()
    cursor = conn.cursor()
    cursor.execute(find_classified_id_list)
    classified_id_list = cursor.fetchall()
    return classified_id_list
예제 #3
0
def insert_into_category():
    print "开始获取漫画和用户ID"
    conn = comic_hentai_data_source.get_conn()

    comic_id_list = get_comic_id_with_status()
    classified_id_list = get_classified_id_list()
    for classified_id in classified_id_list:
        print "开始添加分类ID为" + str(classified_id[0]) + "的漫画"
        comic_id_list_child = random.sample(comic_id_list, 20)
        print "分类漫画获取完成,开始添加"
        for comic_id in comic_id_list_child:
            insert_sql = "insert into Category(classifiedId, targetId, targetType, created, updated, isDeleted, status)values ("+ str(classified_id[0]) +", "+ str(comic_id[0]) +", 0, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(), 0, 0)"
            cursor = conn.cursor()
            cursor.execute(insert_sql)
            cursor.close()
    conn.commit()
    conn.close()
def get_comic():
    conn = comic_hentai_data_source.get_conn()
    find_comic_simple_info = "select id, title from Comic"
    cursor = conn.cursor()
    cursor.execute(find_comic_simple_info)
    result = cursor.fetchall()
    operator = "curl -XPUT "

    for r in result:
        id = str(r[0])
        title = str(r[1])
        title = title.replace("'", "&#39")
        data = json.dumps({
            "id": id,
            "title": title
        })
        url = r"'http://db.hope6537.com:9200/comichentai/comic/" + id + r"'"
        tmp = operator + url + " -d \'" + data + '\''
        print(tmp)
        commands.getstatusoutput(tmp)
예제 #5
0
def write_comic_data_to_db(comic_list):
    foreign_id_list = ""
    for comic in comic_list:
        foreign_id_list += "'" + comic['comicId'] + "'" + ","
    foreign_id_list = foreign_id_list[0:-1]
    find_sql = "select foreignId from Capture where foreignId in (" + foreign_id_list + ")"
    conn = comic_hentai_data_source.get_conn()
    # 第一步,查询数据是否有重复的
    cursor = conn.cursor()
    cursor.execute(find_sql)
    # 获取到的数据,就是重复的,排除
    values = cursor.fetchall()
    # 刷洗数据
    comic_list = flush_comic_data(comic_list, values)
    print(now() + "经过数据去重后,要插入的漫画数量为" + str(len(comic_list)))
    # 完成刷洗后开始写入
    for comic in comic_list:
        comic_id = insert_comic_data(comic, conn)
        # 然后插入capture表
        insert_capture_data(comic, comic_id, conn)
    print(now() + "完成写入")
    conn.close()
def write_comic_data_to_db(comic_list):
    foreign_id_list = ""
    for comic in comic_list:
        foreign_id_list += "'" + comic['comicId'] + "'" + ","
    foreign_id_list = foreign_id_list[0:-1]
    find_sql = "select foreignId from Capture where foreignId in (" + foreign_id_list + ")"
    conn = comic_hentai_data_source.get_conn()
    # 第一步,查询数据是否有重复的
    cursor = conn.cursor()
    cursor.execute(find_sql)
    # 获取到的数据,就是重复的,排除
    values = cursor.fetchall()
    # 刷洗数据
    comic_list = flush_comic_data(comic_list, values)
    print(now() + "经过数据去重后,要插入的漫画数量为" + str(len(comic_list)))
    # 完成刷洗后开始写入
    for comic in comic_list:
        comic_id = insert_comic_data(comic, conn)
        # 然后插入capture表
        insert_capture_data(comic, comic_id, conn)
    print(now() + "完成写入")
    conn.close()
예제 #7
0
def mysql_connect():
    conn = comic_hentai_data_source.get_conn()
    cursor = conn.cursor()
    # 得到当前数据库中的所有表
    cursor.execute(
            "select distinct table_name from information_schema.columns where table_schema = 'ComicHentai' order by table_schema,table_name")
    tables = cursor.fetchall()
    print(tables)
    for table in tables:
        table = table[0]
        if table == 'TestComic' or table == "TestUser":
            continue
        cursor.execute(
                "select column_name,data_type,is_nullable,column_comment from information_schema.columns where table_name = '" + table + "' order by table_schema,table_name", )
        values = cursor.fetchall()
        columns = []
        for column in values:
            if not (column[0] == 'id' or column[0] == 'created' or column[0] == 'updated' or column[0] == 'isDeleted' or
                            column[0] == 'status'):
                columns.append(column)
        print(columns)
        initAll(table, columns)
    cursor.close()
    conn.close()
예제 #8
0
# encoding:utf-8
import json
import os
# 收集已经完成下载的漫画ID
import time
import comic_hentai_data_source
import oss2

conn = comic_hentai_data_source.get_conn()


def now():
    return time.strftime("[%Y-%m-%d %H:%M:%S]",
                         time.localtime(int(time.time())))


if not conn:
    print(now() + "数据库连接初始化失败,停止操作")
    exit(0)
else:
    print(now() + "数据库连接初始化成功")


# 收集下载好的数据
def collect_already_download_comic_id():
    print(now() + "开始收集数据")
    comic_id_list = []
    for files in os.listdir("./ComicData"):
        if files.count(".json"):
            comic_id = files.split(".json")[0]
            # 查看下有没有这个文件夹
# encoding:utf-8
import json
import os
# 收集已经完成下载的漫画ID
import time
import comic_hentai_data_source
import oss2

conn = comic_hentai_data_source.get_conn()


def now():
    return time.strftime("[%Y-%m-%d %H:%M:%S]", time.localtime(int(time.time())))


if not conn:
    print(now() + "数据库连接初始化失败,停止操作")
    exit(0)
else:
    print(now() + "数据库连接初始化成功")


# 收集下载好的数据
def collect_already_download_comic_id():
    print(now() + "开始收集数据")
    comic_id_list = []
    for files in os.listdir("./ComicData"):
        if files.count(".json"):
            comic_id = files.split(".json")[0]
            # 查看下有没有这个文件夹
            if os.path.exists(os.getcwd() + "/ComicData/" + comic_id):