def getStocksList_CHN(root_path):
    try:
        df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN")
        df.index = df.index.astype(str).str.zfill(6)
    except Exception as e:
        df = pd.DataFrame()

    if df.empty == False: return df

    stock_info = ts.get_stock_basics()
    listData = pd.DataFrame(stock_info)
    listData['daily_update'] = '1970-07-01'
    listData['weekly_update'] = '1970-07-01'
    listData['monthly_update'] = '1970-07-01'
    listData['news_update'] = '1970-07-01'
    listData.index.name = 'symbol'
    listData = listData.reset_index()

    #listData.index.name = 'symbol'
    #listData.index = listData.index.astype(str).str.zfill(6) #[str(symbol).zfill(6) for symbol in listData.index] #listData.index.astype(str).str.zfill(6)
    #print(listData.index)
    #listData['symbol'] = listData['symbol'].str.strip()

    storeStockList(root_path, "DB_STOCK", "SHEET_CHN", listData)
    df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN")

    if df.empty == False: df.index = df.index.astype(str).str.zfill(6)
    return df
def getStocksList_US(root_path):
    try:
        df = queryStockList(root_path, "DB_STOCK", "SHEET_US")
    except Exception as e:
        df = pd.DataFrame()

    if df.empty == False:
        return df

    for exchange in ["NASDAQ", "NYSE"]:
        print("fetching " + exchange + " stocklist...")
        url = "http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=%s&render=download" % exchange
        repeat_times = 1  # repeat downloading in case of http error
        for _ in range(repeat_times):
            try:
                urlData = requests.get(url, timeout=15).content
                if df.empty:
                    df = pd.read_csv(io.StringIO(urlData.decode('utf-8')))
                else:
                    df = pd.concat([
                        df,
                        pd.read_csv(io.StringIO(urlData.decode('utf-8')))
                    ])
                break
            except Exception as e:
                print("exception in getStocks:" + exchange, str(e))
                continue

    df = df[(df['MarketCap'] > 100000000)]
    df = df.drop_duplicates(subset=['Symbol'], keep='first')
    df.sort_index(ascending=True, inplace=True)
    listData = df[['Symbol', 'Name', 'MarketCap', 'Sector', 'Industry']].copy()

    listData = listData.rename(
        columns={
            'Symbol': 'symbol',
            'Name': 'name',
            'MarketCap': 'market_cap',
            'Sector': 'sector',
            'Industry': 'industry'
        })
    listData.loc[len(listData)] = [
        'SPY', 'SPDR S&P 500 ETF Trust', 0.0, '', ''
    ]
    listData.loc[len(listData)] = ['^VIX', 'VOLATILITY S&P 500', 0.0, '', '']
    listData['symbol'] = listData['symbol'].str.strip()

    listData['daily_update'] = '1970-07-01'
    listData['weekly_update'] = '1970-07-01'
    listData['monthly_update'] = '1970-07-01'
    listData['news_update'] = '1970-07-01'

    listData = listData.reset_index(drop=True)
    storeStockList(root_path, "DB_STOCK", "SHEET_US", listData)
    return queryStockList(root_path, "DB_STOCK", "SHEET_US")
def getStocksList(root_path):
    try:
        df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY")
        df.index = df.index.astype(str).str.zfill(6)
    except Exception as e:
        df = pd.DataFrame()

    if df.empty == False: return df

    stock_info = ts.get_stock_basics()
    listData = pd.DataFrame(stock_info)
    #listData.index.name = 'Symbol'
    #listData.index = listData.index.astype(str).str.zfill(6) #[str(symbol).zfill(6) for symbol in listData.index] #listData.index.astype(str).str.zfill(6)
    #print(listData.index)
    #listData['Symbol'] = listData['Symbol'].str.strip()
    storeStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY", listData)
    df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY")
    df.index = df.index.astype(str).str.zfill(6)
    return df
Exemple #4
0
def updateStockTwitterData(root_path, from_date, till_date, storeType):
    df = queryStockList(root_path, "DB_STOCK", "SHEET_US_DAILY")
    symbols = df['symbol'].values.tolist()

    pbar = tqdm(total=len(symbols))

    if storeType == 1 or storeType == 2:
        for symbol in symbols:
            startTime = updateSingleStockTwitterData(root_path, symbol,
                                                     from_date, till_date)
            outMessage = '%-*s fetched in:  %.4s seconds' % (6, symbol,
                                                             (time.time() -
                                                              startTime))
            pbar.set_description(outMessage)
            pbar.update(1)

    pbar.close()
Exemple #5
0
def updateStockTwitterData(root_path, from_date, till_date, storeType):
    df = queryStockList(root_path, "DB_STOCK", "SHEET_US_DAILY")
    symbols = df['symbol'].values.tolist()

    pbar = tqdm(total=len(symbols))
    
    if storeType == 1 or storeType == 2:
        for symbol in symbols:
            startTime = updateSingleStockTwitterData(root_path, symbol, from_date, till_date)
            outMessage = '%-*s fetched in:  %.4s seconds' % (6, symbol, (time.time() - startTime))
            pbar.set_description(outMessage)
            pbar.update(1)
            # top5 = getWordCount(df, symbol, stocklist)
            # print("hot correlation:", top5)
            # getSentiments(df)

    # if storeType == 1:
    #     log_errors = []
    #     log_update = []
    #     with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    #         # Start the load operations and mark each future with its URL
    #         future_to_stock = {executor.submit(updateSingleStockTwitterData, root_path, symbol, from_date, till_date): symbol for symbol in symbols}
    #         for future in concurrent.futures.as_completed(future_to_stock):
    #             stock = future_to_stock[future]
    #             try:
    #                 startTime, message = future.result()
    #             except Exception as exc:
    #                 startTime = time.time()
    #                 log_errors.append('%r generated an exception: %s' % (stock, exc))
    #                 len_errors = len(log_errors)
    #                 if len_errors % 5 == 0: print(log_errors[(len_errors-5):]) 
    #             else:
    #                 if len(message) > 0: log_update.append(message)
    #             outMessage = '%-*s fetched in:  %.4s seconds' % (6, stock, (time.time() - startTime))
    #             pbar.set_description(outMessage)
    #             pbar.update(1)
    #     if len(log_errors) > 0: print(log_errors)
    #     # if len(log_update) > 0: print(log_update)

    pbar.close()
def process_all_stocks_data(root_path, window=1):
    df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY")
    df.index = df.index.astype(str).str.zfill(6)
    symbols = df.index.values.tolist()

    pbar = tqdm(total=len(symbols))

    day_selection = []
    week_selection = []
    month_selection = []

    startTime = time.time()
    for symbol in symbols:
        startTime = processing_stock_data(root_path, symbol, window,
                                          day_selection, week_selection,
                                          month_selection)
        outMessage = '%-*s processed in:  %.4s seconds' % (6, symbol,
                                                           (time.time() -
                                                            startTime))
        pbar.set_description(outMessage)
        pbar.update(1)
    print('total processing in:  %.4s seconds' % ((time.time() - startTime)))

    day_week_selection = list(set(day_selection) & set(week_selection))
    week_month_selection = list(set(week_selection) & set(month_selection))
    day_month_selection = list(set(day_selection) & set(month_selection))
    all_selection = list(set(day_week_selection) & set(week_month_selection))

    print("all_selection", len(all_selection), sorted(all_selection))
    print("day_week_selection", len(day_week_selection),
          sorted(day_week_selection))
    print("week_month_selection", len(week_month_selection),
          sorted(week_month_selection))
    print("day_month_selection", len(day_month_selection),
          sorted(day_month_selection))
    print("/n ------------------------ /n")

    print("day_selection", len(day_selection), sorted(day_selection))
    print("week_selection", len(week_selection), sorted(week_selection))
    print("month_selection", len(month_selection), sorted(month_selection))
def updateSingleStockData(root_path, symbol, from_date, till_date,
                          force_check):
    startTime = time.time()
    message = ""

    if len(symbol) == 0: return startTime, message

    now_date = pd.Timestamp((datetime.datetime.now()).strftime("%Y-%m-%d"))
    start_date = pd.Timestamp(from_date)
    end_date = pd.Timestamp(till_date)

    if end_date == now_date:
        end_date = end_date - datetime.timedelta(days=1)

    stockData, lastUpdateTime = queryStock(root_path, "DB_STOCK", "SHEET_US",
                                           "_WEEKLY", symbol, "weekly_update")

    if stockData.empty:
        stockData, message = getSingleStock(symbol, from_date, till_date)
        if stockData.empty == False:
            storeStock(root_path, "DB_STOCK", "SHEET_US", "_WEEKLY", symbol,
                       stockData, "weekly_update")
            first_date = pd.Timestamp(stockData.index[0])
            to_date = (first_date -
                       datetime.timedelta(days=1)).strftime("%Y-%m-%d")
            if judgeNeedPreDownload(root_path, symbol, first_date, from_date,
                                    to_date):
                storePublishDay(root_path, "DB_STOCK", "SHEET_US", symbol,
                                first_date.strftime("%Y-%m-%d"))
            message = message + ", database updated"
        else:
            print("get stock from network failed", symbol)
        return startTime, message

    modified = False
    savePublishDay = False

    first_date = pd.Timestamp(stockData.index[0])
    last_date = pd.Timestamp(stockData.index[-1])

    if start_date < first_date:
        to_date = (first_date -
                   datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        if judgeNeedPreDownload(root_path, symbol, first_date, from_date,
                                to_date):
            message = message + ", download pre data from " + from_date + " to " + to_date
            moreStockData, tempMessage = getSingleStock(
                symbol, from_date, to_date)
            message = message + tempMessage
            if len(moreStockData) > 0:
                if isinstance(moreStockData.index, pd.DatetimeIndex):
                    moreStockData.index = moreStockData.index.strftime(
                        "%Y-%m-%d")
                modified = True
                stockData = pd.concat([moreStockData, stockData])
                stockData.index.name = 'date'
            else:
                savePublishDay = True
                storePublishDay(root_path, "DB_STOCK", "SHEET_US", symbol,
                                first_date.strftime("%Y-%m-%d"))
                message = message + ", save stock publish(IPO) day, next time won't check it again"

    updateOnce = now_date > lastUpdateTime

    if (end_date > last_date) and (updateOnce or force_check):
        to_date = (last_date + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        if judgeNeedPostDownload(now_date, to_date, till_date):
            message = message + ", download post data from " + to_date + " to " + till_date
            moreStockData, tempMessage = getSingleStock(
                symbol, to_date, till_date)
            message = message + tempMessage
            if len(moreStockData) > 0:
                if isinstance(moreStockData.index, pd.DatetimeIndex):
                    moreStockData.index = moreStockData.index.strftime(
                        "%Y-%m-%d")
                modified = True
                stockData = pd.concat([stockData, moreStockData])
                stockData.index.name = 'date'

    if modified:
        stockData = stockData[~stockData.index.duplicated(keep='first')]
        storeStock(root_path, "DB_STOCK", "SHEET_US", "_WEEKLY", symbol,
                   stockData, "weekly_update")
    elif updateOnce:
        now_date = datetime.datetime.now().strftime("%Y-%m-%d")
        stockList = queryStockList(root_path, "DB_STOCK", "SHEET_US")
        if stockList[stockList.index == symbol]['daily_update'][0] != now_date:
            stockList.set_value(symbol, 'weekly_update', now_date)
            storeStockList(root_path, "DB_STOCK", "SHEET_US", stockList,
                           symbol)
    elif savePublishDay == False:
        message = ""
    return startTime, message
def process_all_stocks_data(root_path, window=1):
    df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY")
    df.index = df.index.astype(str).str.zfill(6)
    symbols = df.index.values.tolist()

    pbar = tqdm(total=len(symbols))

    day_selection = []
    week_selection = []
    month_selection = []

    # for index in range(0, window):
    #     day_window = []
    #     day_selection.append(day_window)
    #     week_window = []
    #     week_selection.append(week_window)
    #     month_window = []
    #     month_selection.append(month_window)

    startTime = time.time()
    for symbol in symbols:
        startTime = processing_stock_data(root_path, symbol, window,
                                          day_selection, week_selection,
                                          month_selection)
        outMessage = '%-*s processed in:  %.4s seconds' % (6, symbol,
                                                           (time.time() -
                                                            startTime))
        pbar.set_description(outMessage)
        pbar.update(1)
    print('total processing in:  %.4s seconds' % ((time.time() - startTime)))

    # with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    #     # Start the load operations and mark each future with its URL
    #     future_to_stock = {executor.submit(processing_stock_data, root_path, symbol, window, day_selection, week_selection, month_selection): symbol for symbol in symbols}
    #     for future in concurrent.futures.as_completed(future_to_stock):
    #         stock = future_to_stock[future]
    #         try:
    #             startTime = future.result()
    #         except Exception as exc:
    #             startTime = time.time()
    #             print('%r generated an exception: %s' % (stock, exc))
    #         outMessage = '%-*s processed in:  %.4s seconds' % (6, stock, (time.time() - startTime))
    #         pbar.set_description(outMessage)
    #         pbar.update(1)

    # day_week_selection = []
    # week_month_selection = []
    # day_month_selection = []
    # all_selection = []

    #count = []

    day_week_selection = list(set(day_selection) & set(week_selection))
    week_month_selection = list(set(week_selection) & set(month_selection))
    day_month_selection = list(set(day_selection) & set(month_selection))
    all_selection = list(set(day_week_selection) & set(week_month_selection))

    #day_selection = list(set(day_selection) - set(all_selection))
    #week_selection = list(set(week_selection) - set(all_selection))
    #month_selection = list(set(month_selection) - set(all_selection))

    # sumUp = len(day_week_selection[index]) + len(week_month_selection[index]) + len(day_month_selection[index]) + len(all_selection[index])
    # count.insert(0,sumUp)

    print("all_selection", len(all_selection), sorted(all_selection))
    print("day_week_selection", len(day_week_selection),
          sorted(day_week_selection))
    print("week_month_selection", len(week_month_selection),
          sorted(week_month_selection))
    print("day_month_selection", len(day_month_selection),
          sorted(day_month_selection))
    print("/n ------------------------ /n")

    # plt.plot(range(0, len(count)), count)
    # plt.title('A simple chirp')
    # plt.show()
    print("day_selection", len(day_selection), sorted(day_selection))
    print("week_selection", len(week_selection), sorted(week_selection))
    print("month_selection", len(month_selection), sorted(month_selection))
def get_all_stocks_correlation(root_path, dates_range):
    df = queryCorrelation(root_path, "DB_STOCK", "SHEET_CHN_RELA")

    if df.empty == False: return df

    df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY")
    symbols = df.index.values.tolist()

    pbar = tqdm(total=len(symbols))

    stockData = []
    stockList = []
    print("get stock data...")
    # count = 500
    for symbol in symbols:
        startTime = time.time()
        df = get_single_stock_data(root_path, symbol, dates_range)
        if df.empty: continue
        stockData.append(df['Return'])
        stockList.append(symbol)
        outMessage = '%-*s fetched in:  %.4s seconds' % (12, symbol,
                                                         (time.time() -
                                                          startTime))
        pbar.set_description(outMessage)
        pbar.update(1)
        # count -= 1
        # if count == 0: break
    pbar.close()

    print("merge stock data...")
    startTime = time.time()
    df_returns = pd.concat(stockData, axis=1).fillna(0)
    df_returns.columns = stockList
    df_correlations = df_returns.corr()
    print('total processing in:  %.4s seconds' % ((time.time() - startTime)))

    print("cal correlationship...")
    startTime = time.time()
    pairwise_correlations = []
    stockCount = len(stockList)
    pbar = tqdm(total=stockCount * stockCount)
    for i in range(stockCount):
        for j in range(stockCount):
            if j > i:
                pairwise_correlations.append(df_correlations.iloc[i][j])
            pbar.set_description(str(i) + " " + str(j))
            pbar.update(1)

    print("arrange matrix...")
    CHN_company_pairs = combinations(stockList, 2)
    df_CHN_company_pairs = pd.DataFrame(list(CHN_company_pairs))
    df_CHN_company_pairs.columns = ['company1', 'company2']
    df_CHN_company_pairs.loc[:, 'correlation'] = pd.Series(
        pairwise_correlations).T
    df_CHN_company_pairs = df_CHN_company_pairs.sort_values(
        ['correlation'], ascending=[False]).reset_index(drop=True)

    storeCorrelation(root_path, "DB_STOCK", "SHEET_CHN_RELA",
                     df_CHN_company_pairs)

    print('total processing in:  %.4s seconds' % ((time.time() - startTime)))

    pbar.close()

    return df_CHN_company_pairs
Exemple #10
0
if __name__ == "__main__":
    if len(sys.argv) != 4:
        print(
            "please input Stock symbol and start date, end date after python file"
        )
        exit()

    pd.set_option('precision', 3)
    pd.set_option('display.width', 1000)
    warnings.filterwarnings('ignore',
                            category=pd.io.pytables.PerformanceWarning)

    symbol = str(sys.argv[1])

    stocklist = queryStockList(root_path, "DB_STOCK", "SHEET_US_DAILY")

    result = stocklist[stocklist.index == symbol]

    if result.empty:
        print("symbol not exist.")
        exit()

    start_date = str(sys.argv[2])
    end_date = str(sys.argv[3])

    now = datetime.datetime.now().strftime("%Y-%m-%d")

    config = configparser.ConfigParser()
    config.read(root_path + "/" + "config.ini")
    storeType = int(config.get('Setting', 'StoreType'))