Exemple #1
0
def crawler_callableBull(coll, table, firstday) -> Generator:
    def gen_url_giventype(input_date: str) -> str:
        return gen_url('0999C', input_date)

    # gen_url_giventype is local func, can not be used by global get_dict, so make sure to def get_dict locally
    def get_dict(date: str) -> dict:
        return cytoolz.compose(jsonLoadsF, get_plain_text,
                               gen_url_giventype)(date)

    def craw(date: str) -> dict:
        return get_dict(date)

    def save(d: dict) -> None:
        print(coll.insert_one(d).inserted_id)

    def craw_save(date: str) -> None:
        crawler.craw_save(save, craw, date)

    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    # firstday = dt.datetime(2004, 2, 11)
    days_db = days_lite(table)
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) == None
    ]
    print('dates', dates)
    return crawler.looper(craw_save, dates)
Exemple #2
0
def crawler_composite(table: str) -> Generator:
    def craw(date: str) -> pd.DataFrame:
        d = get_dict(date)
        if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!':
            raise crawler.NoData('很抱歉,沒有符合條件的資料!')
        data = d['data3']
        fields = d['fields3']
        date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:]
        df = pd.DataFrame(data, columns=fields).replace(',', '',
                                                        regex=True).replace(
                                                            '--', np.nan)
        df.insert(0, '年月日', date)
        df['年月日'] = pd.to_datetime(df['年月日']).astype(str)
        floatColumns = ['成交金額(元)', '成交股數(股)', '成交筆數']
        df = ast.to_float(floatColumns, df)
        return df

    def save(df: pd.DataFrame) -> None:
        saver.lite(table, df)

    def craw_save(date: str) -> None:
        crawler.craw_save(save, craw, date)

    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    firstday = dt.datetime(2004, 2, 11)
    days_db = days_lite(table)
    nPeriods = lastdate + crawler.dt_to_str(
        adjust.days_trade(firstday) - days_db)

    # lastdate = saver.last_datetime(table)
    # nPeriods = crawler.input_dates(lastdate, dt.datetime.now())
    return crawler.looper(craw_save, nPeriods)
Exemple #3
0
def crawler_extendedCallableBear(table: str) -> Generator:
    gen_url_giventype = partial(gen_url, '0999X')

    # gen_url_giventype is local func, can not be used by global get_dict, so make sure to def get_dict locally
    def get_dict(date: str) -> dict:
        return cytoolz.compose(jsonLoadsF, get_plain_text,
                               gen_url_giventype)(date)

    def craw(date: str) -> pd.DataFrame:
        d = get_dict(date)
        if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!':
            raise crawler.NoData('很抱歉,沒有符合條件的資料!')
        data = d['data1']
        fields = d['fields1']
        date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:]
        df = pd.DataFrame(data, columns=fields).replace(',', '',
                                                        regex=True).replace(
                                                            '--', np.nan)
        df.insert(0, '年月日', date)
        df['年月日'] = pd.to_datetime(df['年月日']).astype(str)
        df['漲跌(+/-)'] = df['漲跌(+/-)'].replace(
            "<p style= color:red>+</p>",
            1).replace("<p style= color:green>-</p>",
                       -1).replace('X', np.nan).replace(' ', 0)
        df['牛熊證觸及限制價格'] = df['牛熊證觸及限制價格'].replace('',
                                                  0).replace('*', 1).replace(
                                                      '*', 1).fillna(np.nan)
        df['本益比'] = df['本益比'].replace('', np.nan).fillna(np.nan)
        intColumns = ['成交股數', '成交筆數', '最後揭示買量', '最後揭示賣量']
        floatColumns = [
            '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌(+/-)', '漲跌價差', '最後揭示買價',
            '最後揭示賣價', '本益比', '牛熊證觸及限制價格', '標的證券收盤價/指數'
        ]
        floatColumns = [col for col in floatColumns if col in list(df)]
        df[intColumns + floatColumns] = df[intColumns + floatColumns].replace(
            '', 0).fillna(np.nan)
        df = ast.to_int(intColumns, df)
        df = ast.to_float(floatColumns, df)
        return df

    def save(df: pd.DataFrame) -> None:
        saver.lite(table, df)

    def craw_save(date: str) -> None:
        crawler.craw_save(save, craw, date)

    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    firstday = dt.datetime(2014, 7, 31)
    days_db = days_lite(table)
    nPeriods = lastdate + crawler.dt_to_str(
        adjust.days_trade(firstday) - days_db)

    # lastdate = saver.last_datetime(table)
    # nPeriods = crawler.input_dates(lastdate, dt.datetime.now())
    return crawler.looper(craw_save, nPeriods)
Exemple #4
0
def mgo_hugeDeal() -> None:
    table = '鉅額交易日成交資訊'
    coll = client['tse'][table]
    firstday = dt.datetime(2005, 4, 4)
    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    days_db = days_lite(table)
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [t.replace('-', '')
             for t in nPeriods if coll.find_one({"date": t}) != None]
    for date in dates:
        doc = coll.find_one({"date": date})
        daily.hugeDeal(date, doc['fields'], doc['data'])
Exemple #5
0
def mgo_extendedCallableBear() -> None:
    table = '可展延牛證'
    coll = db[table]
    firstday = dt.datetime(2014, 7, 31)
    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    days_db = days_lite(table)
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) != None
    ]
    for date in dates:
        doc = coll.find_one({"date": date})
        daily.extendedCallableBear(date, doc['fields1'], doc['data1'])
Exemple #6
0
def mgo_callableBear() -> None:
    table = '熊證(不含可展延熊證)'
    coll = client['tse'][table]
    firstday = dt.datetime(2011, 7, 8)
    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    days_db = days_lite(table)
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) != None
    ]
    for date in dates:
        doc = coll.find_one({"date": date})
        daily.callableBear(date, doc['fields1'], doc['data1'])
Exemple #7
0
def mgo_composite(coll) -> None:
    firstday = dt.datetime(2004, 2, 11)
    lastdate = crawler.dt_to_str([saver.last_datetime('大盤成交統計')])
    days_db = days_lite('大盤成交統計')
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) != None
    ]
    for date in dates:
        doc = coll.find_one({"date": date})
        for i in range(1, 6):
            field = f'fields{i}'
            data = f'data{i}'
            if field in doc:
                if doc[field] == ['成交統計', '成交金額(元)', '成交股數(股)', '成交筆數']:
                    print(date, '大盤成交統計')
                    daily.composite(date, doc[field], doc[data])
Exemple #8
0
def crawler_upsAndDown(table: str) -> Generator:
    def craw(date: str) -> pd.DataFrame:
        d = get_dict(date)
        if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!':
            raise crawler.NoData('很抱歉,沒有符合條件的資料!')
        data = d['data4']
        fields = d['fields4']
        date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:]
        data[0][1].split('(')[0]
        L = []
        l = data[0]
        L.append([i.split('(')[0] for i in l])
        L.append([i.split('(')[1].replace(')', '') for i in l])
        l = data[1]
        L.append([i.split('(')[0] for i in l])
        L.append([i.split('(')[1].replace(')', '') for i in l])
        L.append(data[2])
        L.append(data[3])
        L.append(data[4])
        df = pd.DataFrame(L, columns=fields).replace(',', '',
                                                     regex=True).replace(
                                                         '--', np.nan)
        df.insert(0, '年月日', date)
        df['年月日'] = pd.to_datetime(df['年月日']).astype(str)
        intColumns = ['整體市場', '股票']
        df = ast.to_int(intColumns, df)
        return df

    def save(df: pd.DataFrame) -> None:
        saver.lite(table, df)

    def craw_save(date: str) -> None:
        crawler.craw_save(save, craw, date)

    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    firstday = dt.datetime(2011, 8, 1)
    days_db = days_lite(table)
    nPeriods = lastdate + crawler.dt_to_str(
        adjust.days_trade(firstday) - days_db)

    # lastdate = saver.last_datetime(table)
    # nPeriods = crawler.input_dates(lastdate, dt.datetime.now())
    return crawler.looper(craw_save, nPeriods)
Exemple #9
0
def mgo_market(coll) -> None:
    firstday = dt.datetime(2009, 1, 5)
    lastdate = crawler.dt_to_str([saver.last_datetime('大盤統計資訊')])
    days_db = days_lite('大盤統計資訊')
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) != None
    ]
    for date in dates:
        doc = coll.find_one({"date": date})
        for i in range(1, 6):
            field = f'fields{i}'
            data = f'data{i}'
            if field in doc:
                if doc[field] == ['指數', '收盤指數', '漲跌(+/-)', '漲跌點數', '漲跌百分比(%)']:
                    print(date, '大盤統計資訊')
                    daily.market(date, doc[field], doc[data])
Exemple #10
0
def mgo_upsAndDown(coll) -> None:
    firstday = dt.datetime(2011, 8, 1)
    lastdate = crawler.dt_to_str([saver.last_datetime('漲跌證券數合計')])
    days_db = days_lite('漲跌證券數合計')
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) != None
    ]
    for date in dates:
        doc = coll.find_one({"date": date})
        for i in range(1, 6):
            field = f'fields{i}'
            data = f'data{i}'
            if field in doc:
                if doc[field] == ['類型', '整體市場', '股票']:
                    print(date, '漲跌證券數合計')
                    daily.upsAndDown(date, doc[field], doc[data])
Exemple #11
0
def crawler_close(coll, table, firstday) -> Generator:
    def craw(date: str) -> dict:
        return get_dict(date)

    def save(d: dict) -> None:
        print(coll.insert_one(d).inserted_id)

    def craw_save(date: str) -> None:
        crawler.craw_save(save, craw, date)

    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    # firstday = dt.datetime(2004, 2, 11)
    days_db = days_lite(table)
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) == None
    ]
    print('dates to craw:', dates)
    return crawler.looper(craw_save, dates)
Exemple #12
0
def mgo_close(coll) -> None:
    firstday = dt.datetime(2004, 2, 11)
    lastdate = crawler.dt_to_str([saver.last_datetime('每日收盤行情(全部(不含權證、牛熊證))')])
    days_db = days_lite('每日收盤行情(全部(不含權證、牛熊證))')
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) != None
    ]
    for date in dates:
        doc = coll.find_one({"date": date})
        for i in range(1, 10):
            field = f'fields{i}'
            data = f'data{i}'
            if field in doc:
                if doc[field] == [
                        '證券代號', '證券名稱', '成交股數', '成交筆數', '成交金額', '開盤價', '最高價',
                        '最低價', '收盤價', '漲跌(+/-)', '漲跌價差', '最後揭示買價', '最後揭示買量',
                        '最後揭示賣價', '最後揭示賣量', '本益比'
                ]:
                    print(date, '每日收盤行情(全部(不含權證、牛熊證))')
                    daily.close(date, doc[field], doc[data])
Exemple #13
0
def crawler_marketReturn(table: str) -> Generator:
    def craw(date: str) -> pd.DataFrame:
        d = get_dict(date)
        if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!':
            raise crawler.NoData('很抱歉,沒有符合條件的資料!')
        data = d['data2']
        fields = d['fields2']
        date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:]
        df = pd.DataFrame(data, columns=fields).replace(',', '',
                                                        regex=True).replace(
                                                            '--', np.nan)
        df['漲跌(+/-)'] = df['漲跌(+/-)'].replace(
            "<p style ='color:red'>+</p>",
            1).replace("<p style ='color:green'>-</p>",
                       -1).replace('X', 0).replace(' ', 0)
        df.insert(0, '年月日', date)
        df = df.rename(columns={'報酬指數': '指數'})
        df['年月日'] = pd.to_datetime(df['年月日']).astype(str)
        floatColumns = ['收盤指數', '漲跌(+/-)', '漲跌點數', '漲跌百分比(%)']
        df = ast.to_float(floatColumns, df)
        return df

    def save(df: pd.DataFrame) -> None:
        saver.lite(table, df)

    def craw_save(date: str) -> None:
        crawler.craw_save(save, craw, date)

    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    firstday = dt.datetime(2009, 1, 5)
    days_db = days_lite(table)
    nPeriods = lastdate + crawler.dt_to_str(
        adjust.days_trade(firstday) - days_db)

    # lastdate = saver.last_datetime(table)
    # nPeriods = crawler.input_dates(lastdate, dt.datetime.now())
    return crawler.looper(craw_save, nPeriods)
Exemple #14
0
def crawler_close(table: str) -> Generator:
    def craw(date: str) -> pd.DataFrame:
        d = get_dict(date)
        if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!':
            raise crawler.NoData('很抱歉,沒有符合條件的資料!')
        data = d['data5']
        fields = d['fields5']
        date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:]
        df = pd.DataFrame(data,
                          columns=fields).replace(',', '', regex=True).replace(
                              '--', np.nan).replace('', np.nan)
        df['漲跌(+/-)'] = df['漲跌(+/-)'].replace(
            '<p style= color:red>+</p>',
            1).replace('<p style= color:green>-</p>',
                       -1).replace('X', 0).replace(' ', 0)
        df.insert(0, '年月日', date)
        df['年月日'] = pd.to_datetime(df['年月日']).astype(str)
        floatColumns = [
            '成交股數', '成交筆數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌(+/-)',
            '漲跌價差', '最後揭示買價', '最後揭示買量', '最後揭示賣價', '最後揭示賣量', '本益比'
        ]
        df = ast.to_float(floatColumns, df)
        return df

    def save(df: pd.DataFrame) -> None:
        saver.lite(table, df)

    def craw_save(date: str) -> None:
        crawler.craw_save(save, craw, date)

    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    firstday = dt.datetime(2004, 2, 11)
    days_db = days_lite(table)
    nPeriods = lastdate + crawler.dt_to_str(
        adjust.days_trade(firstday) - days_db)
    return crawler.looper(craw_save, nPeriods)
Exemple #15
0
def craw_hugeDeal(coll) -> Generator:
    table = '鉅額交易日成交資訊'

    def craw(date: str) -> dict:
        return get_dict(date)

    def save(d: dict) -> None:
        print(coll.insert_one(d).inserted_id)

    def craw_save(date: str) -> None:
        crawler.craw_save(save, craw, date)

    firstday = dt.datetime(2005, 4, 4)
    lastdate = crawler.dt_to_str([saver.last_datetime(table)])
    days_db = days_lite(table)
    nPeriods = lastdate + \
        crawler.dt_to_str(adjust.days_trade(firstday) - days_db)
    print('nPeriods', nPeriods)
    dates = [
        t.replace('-', '') for t in nPeriods
        if coll.find_one({"date": t}) == None
    ]
    print('dates', dates)
    return crawler.looper(craw_save, dates)
def gen_url_giventype(input_date: str) -> str:
    return gen_url('S', input_date)


###----鉅額交易日成交資訊----

#!!! not everyday day has huge deal, most of day there are no data

# -- 1 company in 1 day may have more than 1 transaction --
def addNumberF(df):
    df.第幾筆 = list(range(1,len(df.第幾筆)+1))
    return df


lastdate = saver.last_datetime('鉅額交易日成交資訊')


empty = []
def craw_hugeDeal(date: str) -> pd.DataFrame:
    global empty
    
    d = get_dict(date)
    if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!':
        raise crawler.NoData('很抱歉,沒有符合條件的資料!')
    data = d['data']
    fields = d['fields']
    date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:]
    if data== []:
        empty = empty + [date]
    df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace('--', np.nan).replace('', np.nan)
Exemple #17
0
    floatColumns = ['融券賣出成交金額', '借券賣出成交金額']
    df[floatColumns] = df[floatColumns].astype(float)
    intColumns = ['融券賣出成交數量', '借券賣出成交數量']
    df[intColumns] = df[intColumns].astype(int)
    return df


def save(df: pd.DataFrame) -> None:
    saver.lite('當日融券賣出與借券賣出成交量值(元)', df)


def craw_save(date: str) -> None:
    crawler.craw_save(save, craw_margin, date)


table = '當日融券賣出與借券賣出成交量值(元)'
lastdate = crawler.dt_to_str([saver.last_datetime(table)])
firstday = dt.datetime(2008, 9, 26)
days_db = days_lite(table)
nPeriods = lastdate + crawler.dt_to_str(adjust.days_trade(firstday) - days_db)

# nPeriods = crawler.input_dates(lastdate, dt.datetime.now())

generatorG = crawler.looper(craw_save, nPeriods)
for _ in generatorG:
    pass

#crawler.loop(craw_save, nPeriods)

s.close()