Ejemplo n.º 1
0
def main_center():
    stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
    stock_info_url = 'https://gupiao.baidu.com/stock/'
    get_stock_list(stock_list_url, stock_info_url)
    # 筛选股票信息保存到数据库中
    print("待筛选的股票数量={}".format(len(STOCK_URL_LIST)))
    # 记录开始时间
    start_time = time.time()
    # 使用四个进程处理任务
    pool = Pool(4)
    pool.map(package_stock_info, STOCK_URL_LIST)
    pool.close()
    pool.join()
    # 列表去重
    new_stock_info_list = []
    for stock_info in STOCK_INFO_LIST:
        if stock_info not in new_stock_info_list:
            new_stock_info_list.append(stock_info)
    # 保存到DB
    # 实例化MySQL工具类并保存
    mysql_util_instance = mysql_util.MysqlUtil()
    mysql_util_instance.add_many(new_stock_info_list)
    mysql_util_instance.close()
    end_time = time.time()
    # 计算程序执行耗时
    total_time = end_time - start_time
    print("筛选股票信息完毕,共获取[{0}]只,耗时:{1:.2f}秒".format(len(new_stock_info_list), total_time))
Ejemplo n.º 2
0
def dump_to_db(formatdata):
    mysql_obj = mysql_util.MysqlUtil()
    inst_num = len(formatdata["question"])
    sql = "insert memdata (question, answer, type, last_modify_time, insert_time, familiar) values (%s,%s,%s,%s,%s,%s)"
    now = datetime.datetime.now()
    for i in range(inst_num):
        params = (formatdata["question"][i].strip(),
                  formatdata["answer"][i].strip(), type, now, now, 0)
        mysql_obj.execute(sql, params)
Ejemplo n.º 3
0
def single_down(apk_url_info, url, file_path):
    dbhandler = mysql_util.MysqlUtil()
    headers = {
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
               'Referer': apk_url_info[5],
               }
    try:
        start_time = time.localtime(time.time())
        r = requests.get(url, headers=headers, stream=True, timeout=(5, 15))
        with open(file_path, "wb") as fp:
            for chunk in r.iter_content(chunk_size=512):
                if chunk:
                    fp.write(chunk)
    except Exception:
        requestOK = False  # 失败重试
        for i in range(2):
            start_time = time.localtime(time.time())
            log_util.log_write_result("dlid = {}, 第{}次重试".format(apk_url_info[0], i+1))
            try:
                r = requests.get(url, headers=headers,
                                 stream=True, timeout=(10, 120))
                with open(file_path, "wb") as fp:
                    for chunk in r.iter_content(chunk_size=512):
                        if chunk:
                            fp.write(chunk)
                requestOK = True
                break
            except Exception:
                log_util.log_write("发生异常 = " + traceback.format_exc())

        if requestOK:
            # 下载成功
            end_time = time.localtime(time.time())
            sql = '''
                UPDATE sp_random_download_task_multiVer_copy phonedetect_service_emulator SET
                receive_size = %s, 
                begin_time=%s, 
                end_time = %s, 
                dl_status = %s,
                fullpath= %s  WHERE dl_id = %s ;

            '''
            dbhandler.update(sql, (os.stat(file_path).st_size, time.strftime(
                '%Y-%m-%d %H:%M:%S', start_time), time.strftime('%Y-%m-%d %H:%M:%S', end_time), 2, file_path, apk_url_info[0]))
            log_util.log_write_result("dlid = {}, {}下载完成".format(apk_url_info[0], file_path))    
        else:
            dbhandler.update(
                "update sp_random_download_task_multiVer_copy set dl_status = 4 where dl_id = %s and dl_status = 1", (apk_url_info[0],))
            log_util.log_write_result("dlid = {} 下载失败".format(apk_url_info[0]))
            try:
                os.remove(file_path)
            except Exception:
                log_util.log_write("发生异常 = " + traceback.format_exc())
Ejemplo n.º 4
0
 def __init__(self):
     web.header('Content-Type', 'text/html;charset=utf-8')
     self.mysql_util = mysql_util.MysqlUtil()
Ejemplo n.º 5
0
 def __init__(self):
     web.header('Content-Type', 'text/html;charset=utf-8')
     self.mysql_util = mysql_util.MysqlUtil()
     self.conf = json.load(open("conf/ebbinghaus.json"))
Ejemplo n.º 6
0
import os
import config
import log_util
import mysql_util
import downloader
import time
if __name__ == "__main__":
    # 创建下载目录
    if not os.path.exists(config.DOWNLOAD_PATH):
        os.makedirs(config.DOWNLOAD_PATH)
    sql = "select * from sp_random_download_task_multiVer_copy where dl_status = 0 limit 1"
    dbhandler = mysql_util.MysqlUtil()
    # 每次获取一条没有下载的数据
    cur_selected_item = dbhandler.fetchone(sql)
    while cur_selected_item:
        # 乐观锁
        # 当此更新语句成功影响行后才确认拿到下载链接
        res = dbhandler.update("update sp_random_download_task_multiVer_copy set dl_status = 1 where dl_id = %s and dl_status = 0" ,(cur_selected_item[0],))
        if not res:
            cur_selected_item = dbhandler.fetchone(sql)
            continue
        else:
            print("downloading dl_id = {}".format(cur_selected_item[0]))
            downloader.download_file(cur_selected_item, config.THREAD_NUM)
Ejemplo n.º 7
0
def download_file(apk_url_info, num_thread=5):
    global global_get_ok
    global_get_ok = True
    dbhandler = mysql_util.MysqlUtil()
    file_path = str(apk_url_info[0])+"-" + \
        apk_url_info[4]+apk_url_info[19]+apk_url_info[21]
    file_path = os.path.join(config.DOWNLOAD_PATH, file_path.strip() + '.apk')
    if os.path.exists(file_path):
        try:
            os.remove(file_path)
        except Exception:
            log_util.log_write("发生异常 = " + traceback.format_exc())


    r = requests.head(apk_url_info[3], allow_redirects=True)
    real_url = r.url
    if r.headers['Content-Type'] == 'application/vnd.android.package-archive':
        try:
            file_size = int(r.headers['content-length'])
        except:
            # 没有'content-length',无法多线程下载
            log_util.log_write_result("dlid = {} 采用普通下载".format(apk_url_info[0]))
            single_down(apk_url_info, real_url, file_path)
            return

        #  创建一个和要下载文件一样大小的文件

        fp = open(file_path, "wb")
        fp.truncate(file_size)
        fp.close()

        # 开始下载时间
        start_time = time.localtime(time.time())
        # 启动多线程写文件
        thread_list = []  # 线程存放列表

        part = file_size // num_thread
        for i in range(num_thread):
            start = part * i
            if i == num_thread - 1:   # 最后一块
                end = file_size
            else:
                end = start + part

            t = threading.Thread(target=Handler, kwargs={
                                 'start': start, 'end': end, 'url': real_url, 'filename': file_path, 'referer': apk_url_info[5], 'dlid':apk_url_info[0]})
            t.setDaemon(True)
            thread_list.append(t)
            t.start()

        # 等待所有线程下载完成
        for t in thread_list:
            t.join()

        if not global_get_ok:
            log_util.log_write_result("dlid = {} 下载失败".format(apk_url_info[0]))
            # 下载失败,清理下载的文件
            try:
                os.remove(file_path)
            except Exception:
                log_util.log_write("发生异常 = " + traceback.format_exc())
            dbhandler.update(
            "update sp_random_download_task_multiVer_copy set dl_status = 4 where dl_id = %s and dl_status = 1", (apk_url_info[0],))
            return
        # 结束下载时间
        end_time = time.localtime(time.time())
        sql = '''
            UPDATE sp_random_download_task_multiVer_copy phonedetect_service_emulator SET
            file_size = %s,
            receive_size = %s, 
            begin_time=%s, 
            end_time = %s, 
            dl_status = %s,
            fullpath= %s  WHERE dl_id = %s ;

         '''
        dbhandler.update(sql, (file_size, file_size, time.strftime('%Y-%m-%d %H:%M:%S', start_time),
                               time.strftime('%Y-%m-%d %H:%M:%S', end_time), 2, file_path, apk_url_info[0]))
        log_util.log_write_result("dlid = {}, {}下载完成".format(apk_url_info[0] ,file_path))
    else:
        # 更新数据库,为无效链接
        log_util.log_write_result("dlid = {} 为无效链接".format(apk_url_info[0]))
        dbhandler.update(
            "update sp_random_download_task_multiVer_copy set dl_status = 128 where dl_id = %s and dl_status = 1", (apk_url_info[0],))