Example #1
0
def updateAdjusted(row, client, table):
    ticker = row['Ticker'].strip()
    start_date = row['IssueDate']
    end = row['UpdateDate']
    try:
        data = getDataWithAdjClose(client, ticker, 'TWSE', start_date, end)
        data['Ticker'] = ticker
        data.index.name = 'Date'
        data = data.reset_index()
        data.rename(columns={
            '_id': 'idx',
            'Adj Close': 'Adj_Close'
        },
                    inplace=True)
        datas = [dict(x._asdict()) for x in data.itertuples()]
        with ThreadPoolExecutor(20) as executor:
            exes = [executor.submit(update_data, d) for d in datas]
            datas = [exe.result() for exe in exes]
    except Exception as e:
        print(e)
        pass
    except KeyboardInterrupt:
        os._exit(0)
    else:
        with ThreadPoolExecutor(20) as executor:
            results = [
                executor.submit(update_one, table, data) for data in datas
            ]
            [res.result() for res in results]
        Tele.sendMessage(f'update Adj Close of {ticker} Success')
        return
    Tele.sendMessage(f'update Adj Close of {ticker} Failed')
Example #2
0
if not os.path.isdir(path):
    path = os.getcwd()


def update_data_dict(d):
    del d['Index']
    d['Adj Close'] = d['Close']
    return d


def update_one(table, d):
    table.update_one(d, {'$set': d}, upsert=True)
    time.sleep(0.001)


if __name__ == '__main__':
    client = Mongo()
    table = client['admin']['TWSE']['historicalPrice']
    table.create_index([('Date', 1), ('Ticker', 1)], unique=True)
    df = read_csv(os.path.join(path, 'TWSE_HistoricalPrice.txt'), sep='\t')
    data = [dict(x._asdict().items()) for x in df.itertuples()]

    start_time = time.time()
    with ThreadPoolExecutor(50) as executor:
        exes = [executor.submit(update_data_dict, d) for d in data]
        data = [exe.result() for exe in exes]
    table.insert_many(data)
    duration = round((time.time() - start_time) / 60, 4)
    Tele().sendMessage(f'insert {len(data)} data use {duration} mins',
                       group='UpdateMessage')
                    afterhour_table.insert_many(AfterHour_Odds)
            except:
                time.sleep(10)
            time.sleep(5)

    else:  #null db or haven't up to date
        dates = date_range(afterhour_start, datetime.today())
        print('First time for create DB or haven\'t up to date')
        for date in dates:
            print(f'crawl {date}')
            try:
                # crawl Intraday and insert to mongo
                AfterHour_Odds = OddLot_AfterHour(date)
                if AfterHour_Odds is not None:
                    afterhour_table.insert_many(AfterHour_Odds)
            except:
                time.sleep(10)

            try:
                # crawl AfterHour and insert to mongo
                if date >= intraday_start:
                    Intraday_Odds = OddLot_Intraday(date)
                    if Intraday_Odds is not None:
                        intraday_table.insert_many(Intraday_Odds)
            except:
                time.sleep(10)
            time.sleep(5)

    # send finish message
    Tele().sendMessage('Update TWSE Odd Lot Success.', group='UpdateMessage')
Example #4
0
        end_date = datetime.today()
        dates = date_range(start_date, end_date)
        full_Kbar = []
        for date in dates:
            try:
                print(f'crawl {date}')
                full_dict, kBar = crawl5SecIndex(date)
            except Exception as e:
                print(f'{date} has no data')
                time.sleep(10)
                continue
            else:
                try:
                    Intraday_table.insert_many(full_dict)
                except:
                    pass
                try:
                    Interday_table.insert_many(kBar)
                except:
                    pass
                time.sleep(5)
        duration = time.time() - start_time
    except Exception as e:
        Tele().sendMessage('Update Intraday Index Data Failed',
                           group='UpdateMessage')
        print(e)
    else:
        Tele().sendMessage(
            f'Update Intraday Index Data Success, cost {round(duration, 2)} seconds.',
            group='UpdateMessage')
                if os.path.isfile(
                        os.path.join(otcpath,
                                     f"{date.strftime('%Y-%m-%d')}.txt")):
                    df_otc = read_csv(os.path.join(
                        otcpath, f"{date.strftime('%Y-%m-%d')}.txt"),
                                      sep='\t')
                    data_otc = [
                        update_data_dict(x)
                        for x in list(df_otc.T.to_dict().values())
                    ]
                else:
                    data_otc = OTC_HistoricalPrice(date)
                if len(data_otc) > 0:
                    DataFrame(data_otc).to_csv(os.path.join(
                        otcpath, f"{date.strftime('%Y-%m-%d')}.txt"),
                                               sep='\t',
                                               index=None,
                                               float_format='%g')
                    table.insert_many(data_otc)
                print(f'Update {date} Historical Price of otc success')
            except Exception as e:
                print('otc', e)
            time.sleep(5)

        # send finish message
        # Line().sendMessage('Update Stock Historical Price success')
        Tele().sendMessage('Update Stock Historical Price success',
                           group='UpdateMessage')
    except Exception as e:
        print(e)
Example #6
0
import pandas as pd
import os, time
from modules import Tele, Mongo
path = os.path.dirname(os.path.abspath(__file__))
if not os.path.isdir(path):
    path = os.getcwd()

__updated__ = '2021-01-31 04:21:20'

if __name__ == '__main__':
    client = Mongo()
    _batch_size = 15 * 1024 * 1024

    table = client['admin']['USSE']['historicalPrice']
    table.create_index([('Date', 1), ('Ticker', 1)], unique=True)

    start_time = time.time()
    datas = pd.read_csv(os.path.join(path, 'USSE_HistoricalPrice.txt'),
                        sep='\t',
                        chunksize=_batch_size)
    total_data = 0
    for df in datas:
        data = [dict(x._asdict().items()) for x in df.itertuples()]
        for d in data:
            del d['Index']
        table.insert_many(data)
        total_data += len(data)
    duration = round((time.time() - start_time) / 60, 4)
    Tele().sendMessage(
        f'insert {total_data} USSE Historical data use {duration} mins',
        group='UpdateMessage')
Example #7
0
        Options_Files = sorted(listdir(optPath))
        for Name in Options_Files:
            if '.zip' not in Name: continue
            print('Option', Name)
            try:
                df = Options(Name)
            except:
                try:
                    df = Options_Before2015(Name)
                except:
                    try:
                        df = Options_Before2010(Name)
                    except:
                        df = Options_multiFiles(Name)
            parallel_update(opt_table, createTickerOpt, df)
            time.sleep(1)

    fut_lastDate = sorted([
        parseDatetime(x) for x in fut_table.distinct('Date') if x is not None
    ])[-1]
    df = Futures_Data_Daily(fut_lastDate)
    parallel_update(fut_table, createBrokerID, df)

    opt_lastDate = sorted([
        parseDatetime(x) for x in opt_table.distinct('Date') if x is not None
    ])[-1]
    df = Options_Data_Daily(opt_lastDate)
    parallel_update(opt_table, createTickerOpt, df)

    Tele().sendMessage(f'盤後爬取期交所,所有期貨收盤價成功', group='UpdateMessage')
Example #8
0

if __name__ == '__main__':
    # Connect to Mongo
    client = Mongo()

    # connect to historical Price table
    table = client['admin']['TWSE']['historicalPrice']
    uniqueStock = table.distinct('Ticker')

    # connect to stocklist table
    tickers_table = client['admin']['TWSE']['StockList']
    updateDate = sorted(tickers_table.distinct('UpdateDate'))[-1]

    # get all tickers
    tickers = list(tickers_table.find({'UpdateDate': {'$eq': updateDate}}))
    # tickers = list(tickers_table.find())

    # create pool and run update Adjusted Close
    start_time = time.time()
    with ThreadPoolExecutor(20) as executor:
        exes = [
            executor.submit(updateAdjusted, row, client, table) for row in
            [row for row in tickers if row['Ticker'] in uniqueStock]
        ]
        finished_process = [exe.result() for exe in exes]
    duration = round((time.time() - start_time) / 3600, 4)
    Tele().sendMessage(
        f'update {len(finished_process)} assets, used {duration} hours',
        group='UpdateMessage')
Example #9
0
if __name__ == '__main__':
    start_time = time()
    # connect to Mongo
    client = Mongo()
    table = client['admin']['TWSE']['StockList']

    # Create index
    table.create_index([('Ticker', 1), ('Name', 1)], unique=True)

    # Get Exists Stocks
    ExistsStocks = pd.DataFrame(list(table.find()))

    # request stock list, Mode {2:上市, 4:上櫃}
    output = requestStockList(2) + requestStockList(4)

    output = list(map(updateOutput, list(product(output, [ExistsStocks]))))

    # insert Data
    list(
        map(
            lambda x: table.update_one({'_id': x['_id']}, {'$set': x},
                                       upsert=True)
            if '_id' in x else table.update_one(x, {'$set': x}, upsert=True),
            output))

    duration = round((time() - start_time) / 60, 2)
    # print(f'Get stock list use {duration} mins')

    # send finish message
    Tele().sendMessage(f'Update Stock List success use {duration} mins',
                       group='UpdateMessage')
Example #10
0
        return Importdata


if __name__ == "__main__":
    client = Mongo()
    table = client['admin']['TWSE']['Actions']
    start_date = datetime.strptime(table.distinct('Date')[-1],
                                   '%Y-%m-%d') + timedelta(days=1)
    date_range = date_range(start_date, datetime.today())

    for date in date_range:  ###   Date   ###

        Importdata = Main_Crawler(date)

        if Importdata is not None:
            if not Importdata.empty:
                Importdata.to_csv(os.path.join(
                    actionpath, f'Daily_{date.strftime("%Y-%m-%d")}.txt'),
                                  sep='\t',
                                  index=None)
                data = [
                    dict(x._asdict().items()) for x in Importdata.itertuples()
                ]
                for x in data:
                    del x['Index']
                table.insert_many(data)
        time.sleep(5)

    # send finish message
    Tele().sendMessage('Update TWSE Actions success', group='UpdateMessage')
        # Tele().sendMessage(f'爬取 {date.strftime("%Y-%m-%d")} 上市類股外資持股比例成功')
            
def crawl_otc(schema):
    table_name = 'TWSE.ForeignInvestment.Industry.OTC'
    collections = schema.list_collection_names()
    table = schema[table_name]
    if table_name not in collections:
        table.create_index([('Date',1), ('Industry',1)])
        start = datetime(2007,4,23)
    else:
        cnt = table.count_documents({})
        if cnt == 0:
            start = datetime(2007,4,23)
        else:
            start = datetime.strptime(sorted(table.distinct('Date'))[-1], "%Y-%m-%d") + timedelta(1)
    
    td = datetime.today()
    dates = date_range(start, td)
    for date in dates:
        full_data = crawl_foriegn_holding_ratio_otc(date)
        if full_data:
            table.insert_many(full_data)
        # Tele().sendMessage(f'爬取 {date.strftime("%Y-%m-%d")} 上櫃類股外資持股比例成功')

if __name__ == '__main__':
    client = Mongo()
    schema = client['admin']
    # crawl_listed(schema)
    crawl_otc(schema)    
    Tele().sendMessage(f'爬取外資持股比例成功', group='UpdateMessage')