コード例 #1
0
def update_sfd_in_list(stock_id_list,
                       sfd_dir,
                       smd_dir,
                       days,
                       force_update=False):
    now = datetime.datetime.now()
    if 8 <= now.hour <= 15:
        return

    update_log_path = sfd_dir + "/update.log"
    if is_smd_need_update(update_log_path) or force_update:
        size = len(stock_id_list)
        id_list_list = []
        if size > 200:
            for i in range(0, size, 200):
                id_list_list.append(stock_id_list[i:i + 200])

        t_list = []
        for i, cur_id_list in enumerate(id_list_list):
            t = threading.Thread(target=t_update_sfd_in_list,
                                 args=(cur_id_list, sfd_dir, smd_dir, days))
            t_list.append(t)
            t.start()

        for t in t_list:
            while t.is_alive():
                tools.delay(10)

        communicate.master_close()

        update_log_file = open(update_log_path, 'w')
        update_log_file.write(datetime.datetime.now().strftime("%Y/%m/%d/%H"))
        update_log_file.close()
コード例 #2
0
def update_livedata_dict(stock_id_list, livedata_dict):
    now = datetime.datetime.now()
    if (7 * 60 + 30) < (now.hour * 60 + now.minute) < (8 * 60 + 30):
        tools.delay(300)
        return

    logger.logp("update_livedata : start")

    t_start = datetime.datetime.now()

    size = len(stock_id_list)
    id_list_list = []
    if size > 100:
        for i in range(0, size, 100):
            id_list_list.append(stock_id_list[i:i + 100])

    for i, cur_id_list in enumerate(id_list_list):
        logger.logp("get live data {} / {}".format(i + 1, len(id_list_list)))
        livedata_list = crawler.get_livedata_list(cur_id_list)
        logger.logp("get live data OK")
        if livedata_list is not None:
            logger.logp("read live data list")
            read_livedata_list(livedata_list, livedata_dict)
            logger.logp("read live data list OK")

    t_end = datetime.datetime.now()
    logger.logp("update_livedata : Total time = {} s".format(
        (t_end - t_start).total_seconds()))
    logger.logp("update_livedata : Done")
コード例 #3
0
ファイル: crawler.py プロジェクト: ybcsie/stock_server
def get_month_data(year, month, stock_id):
    logger.logp("Get month data: {} {}".format(year, month))

    arg = "STOCK_DAY?response=json&date={}{:02d}01&stockNo={}".format(
        year, month, stock_id)
    url = "http://www.twse.com.tw/exchangeReport/" + arg

    tools.delay(5)  # delay

    max_try = 3
    while True:
        logger.logp("Trying connection...")
        from socket import timeout
        try:
            res = urllib.request.urlopen(url, timeout=10)
            logger.logp("OK")

        except timeout:
            logger.logp("Error: urllib -- timeout")
            tools.wait_retry(logger, 10)
            continue

        except Exception as e:
            logger.logp("Error: urllib")
            logger.logp(e)
            tools.wait_retry(logger, 30)
            continue

        logger.logp("Trying json decode...")
        data = ""
        try:
            data = json.loads(res.read().decode())
            if data["stat"] != "OK":
                if data["stat"] == "很抱歉,沒有符合條件的資料!":
                    return []
                logger.logp("data error: stat = {}".format(data["stat"]))

                tools.wait_retry(logger, 5)
                if max_try == 0:
                    return None

                max_try -= 1
                continue

        except:
            logger.logp("Error: json \"{}\"".format(data))
            tools.wait_retry(logger, 5)
            continue

        # check content date
        if tools.check_smd_content_by_key(data["data"][0], year * 100 + month):
            return data["data"]

        else:
            logger.logp("error content: {} {}".format(year * 100 + month,
                                                      data["data"]))
            tools.wait_retry(logger, 5)
            continue
コード例 #4
0
def main_loop():
    logger = msgopt.Logger("main", print)
    global is_ready
    while True:
        updated = True
        dtd_updated = True

        logger.logp("update_listed_list : start")
        dataio.update_listed_list(listed_sid_path)
        logger.logp("update_listed_list : done\n")

        dataio.update_all_dtd(dtd_dir, months)

        listed_id_list = dataio.get_stock_id_list(listed_sid_path)

        logger.logp("update_smd_in_list : start")
        force_update = False
        dataio.update_smd_in_list(
            listed_id_list, trade_data_dir, months, force_update)
        logger.logp("update_smd_in_list : done\n")

        dataio.update_sfd_in_list(
            listed_id_list, sfd_dir, trade_data_dir, 365 * 4, force_update)

        dataio.update_livedata_dict(listed_id_list, livedata_dict)

        is_ready = True

        while True:
            now = datetime.datetime.now()

            if now.hour == 15 and not updated:
                break

            if now.hour == 1 and not dtd_updated:
                break

            if not updated:
                dataio.update_livedata_dict(listed_id_list, livedata_dict)

            if 8 <= now.hour < 14:
                if updated:
                    updated = False
                continue

            if now.hour == 0:
                if dtd_updated:
                    dtd_updated = False

            # # debug
            # dataio.update_livedata_dict(listed_id_list, livedata_dict)
            # continue
            # # end debug

            logger.logp("sleep 300s ...\n")
            tools.delay(300)
コード例 #5
0
ファイル: crawler.py プロジェクト: ybcsie/stock_server
def get_livedata_list(stock_id_list):
    delay = 4
    max_try = 3
    while max_try > 0:
        tools.delay(delay)

        try:
            logger.logp("connecting to livedata...")
            url = "http://163.29.17.179/stock/fibest.jsp"
            cookie = http.cookiejar.CookieJar()
            handler = urllib.request.HTTPCookieProcessor(cookie)
            opener = urllib.request.build_opener(handler)
            logger.logp("opening url fibest...")
            opener.open(url)
            logger.logp("url fibest opened.")

            stock_arg = ""
            for stock_id in stock_id_list:
                stock_arg += "tse_{}.tw|".format(stock_id)

            arg = "getStockInfo.jsp?ex_ch={}&json=1&delay=0&_={}".format(
                stock_arg, int(time.time() * 1000))
            url = "http://163.29.17.179/stock/api/" + arg
            request = urllib.request.Request(url)
            logger.logp("opening url getStockInfo...")
            res = opener.open(request, timeout=10)
            logger.logp("url getStockInfo opened.")

        except:
            logger.logp("Error: connection")
            max_try -= 1
            continue

        try:
            livedata_list = json.loads(res.read().decode())
            if livedata_list["rtmessage"] != "OK":
                logger.logp("Error: data")
                max_try -= 1
                continue

        except:
            logger.logp("Error: json")
            max_try -= 1
            continue

        return livedata_list["msgArray"]

    tools.delay(30)
    return None
コード例 #6
0
def update_smd_in_list(stock_id_list, smd_dir, months, force_update=False):
    update_log_path = smd_dir + "/update.log"

    if not is_smd_need_update(update_log_path) and not force_update:
        return

    communicate.master_start()

    split_num = len(communicate.slaves) + 1
    total = len(stock_id_list)
    subtotal = int(total / split_num) + 1

    t_list = []
    stock_id_sublist_master = None
    slave_id = -1

    for i in range(0, total, subtotal):
        if i == 0:
            stock_id_sublist_master = stock_id_list[i:i + subtotal]
            continue

        stock_id_sublist = stock_id_list[i:i + subtotal]
        slave_en = True
        slave_id += 1

        t = threading.Thread(target=t_update_smd_in_list,
                             args=(stock_id_sublist, smd_dir, months, slave_en,
                                   slave_id))
        t_list.append(t)
        t.start()

    t_update_smd_in_list(stock_id_sublist_master,
                         smd_dir,
                         months,
                         slave_en=False)

    for t in t_list:
        while t.is_alive():
            tools.delay(10)

    update_log_file = open(update_log_path, 'w')
    update_log_file.write(datetime.datetime.now().strftime("%Y/%m/%d/%H"))
    update_log_file.close()
コード例 #7
0
def add_ms_ac_info(input_file, output_file):
    papers = load_json_lines(input_file)
    papers = log_stream(papers, name='Input')
    papers_parsed = ({
        'ms_academic':
        get_mc_ac_paper(expr="and(Ti='" + normalize_title(p['title']) +
                        "',Y>=2014)"),
        **p
    } for p in delay(papers, 2))
    papers_parsed_printed = log_stream(papers_parsed, name='Output')
    write_json_lines(papers_parsed_printed, output_file)
コード例 #8
0
def debug_loop():
    while True:
        tools.delay(5)
        if is_ready:
            print(livedata_dict)
コード例 #9
0
ファイル: crawler.py プロジェクト: ybcsie/stock_server
def get_full_data(stock_id, yyyymmdd):
    delay = 0.3
    max_try = 5
    while max_try > 0:
        tools.delay(delay)
        logger.logp("connect {} {}".format(stock_id, yyyymmdd))

        try:
            args = "?action=r&id={}&date={}".format(stock_id, yyyymmdd)
            url = "http://www.cmoney.tw/notice/chart/stockchart.aspx" + args
            cookie = http.cookiejar.CookieJar()
            handler = urllib.request.HTTPCookieProcessor(cookie)
            opener = urllib.request.build_opener(handler)
            res = opener.open(url, timeout=10).read().decode()

        except:
            logger.logp("Error: connection")
            max_try -= 1
            tools.delay(3)
            continue

        try:
            i = res.find("var ck")
            s = res.find('"', i) + 1
            e = res.find('"', s)

            ck = res[s:e]

        except:
            logger.logp("Error: parse ck")
            max_try -= 1
            tools.delay(3)
            continue

        try:
            args += "&ck=" + ck
            url2 = "http://www.cmoney.tw/notice/chart/stock-chart-service.ashx" + args
            request = urllib.request.Request(url2)
            request.add_header("Referer", url)
            res = opener.open(request, timeout=10)

        except:
            logger.logp("Error: connection")
            max_try -= 1
            tools.delay(3)
            continue

        try:
            content = json.loads(res.read().decode())
            if content["ErrorCode"] == 0:
                return content
            else:
                if content["ErrorCode"] == 124554:
                    return {}

                logger.logp("ErrorCode: {}".format(content["ErrorCode"]))
                max_try -= 1
                tools.delay(3)
                continue

        except:
            logger.logp("Error: json")
            max_try -= 1
            tools.delay(3)
            continue

    return None
コード例 #10
0
ファイル: crawler.py プロジェクト: ybcsie/stock_server
def get_day_trading_data(yyyymmdd):
    logger.logp("get_day_trading_data: {}".format(yyyymmdd))

    url = "http://www.twse.com.tw/exchangeReport/TWTB4U?response=json&date={}&selectType=All".format(
        yyyymmdd)

    tools.delay(3)  # delay

    max_try = 3
    while True:
        logger.logp("Trying connection...")
        from socket import timeout
        try:
            res = urllib.request.urlopen(url, timeout=10)
            logger.logp("OK")

        except timeout:
            logger.logp("Error: urllib -- timeout")
            tools.wait_retry(logger, 10)
            continue

        except:
            logger.logp("Error: urllib")
            tools.wait_retry(logger, 30)
            continue

        logger.logp("Trying json decode...")
        # check stat
        try:
            data = json.loads(res.read().decode())
            if data["stat"] != "OK":
                logger.logp("data error: stat = {}".format(data["stat"]))

                tools.wait_retry(logger, 5)
                if max_try == 0:
                    return None

                max_try -= 1
                continue

        except:
            logger.logp("Error: json when checking stat")
            tools.wait_retry(logger, 5)
            continue

        # check date
        try:
            if data["date"] != "{}".format(yyyymmdd):
                logger.logp("data error: date = {}".format(data["date"]))

                tools.wait_retry(logger, 5)
                if max_try == 0:
                    return None

                max_try -= 1
                continue

        except:
            logger.logp("Error: json when checking date")
            tools.wait_retry(logger, 5)
            continue

        # return
        return data["data"]
コード例 #11
0
ファイル: crawler.py プロジェクト: ybcsie/stock_server
def get_listed_list():
    url = "http://isin.twse.com.tw/isin/class_main.jsp?market=1&issuetype=1"
    max_try = 3
    while True:
        if max_try == 0:
            return None

        if max_try != 3:
            tools.delay(5)

        max_try -= 1

        try:
            res = urllib.request.urlopen(url)
        except:
            logger.logp("Error: get listed id -- urllib")
            continue

        content = res.read().decode("cp950", errors='ignore')
        i_end = content.find("</table>")
        if i_end < 0:
            logger.logp("Error: get listed id -- source")
            continue

        i = i_end + 10

        i_end = content.find("</table>", i)
        if i_end < 0:
            logger.logp("Error: get listed id -- source")
            continue

        op_str = ""
        is_first_data = True

        while i < i_end:
            stock_id_str = ""
            ipo_date = ""

            i = content.find("<tr>", i)
            if i < 0:
                break

            for j in range(3):
                i = content.find("<td", i + 5)

            i = content.find('>', i + 5)
            i += 1

            while content[i] != '<':
                if content[i] != ' ' and content[i] != '\n':
                    stock_id_str += content[i]
                i += 1

            # ipo date
            for j in range(5):
                i = content.find("<td", i + 5)
            i = content.find('>', i + 5)
            i += 1
            while content[i] != '<':
                if content[i] != ' ' and content[i] != '\n':
                    ipo_date += content[i]
                i += 1

            op = "{},{}".format(stock_id_str, ipo_date)
            if is_first_data:
                is_first_data = False
            else:
                op = ';' + op

            op_str += op

        return op_str