Example #1
0
def processCsv(db, csvfile):
    with cd(getDataDir(S.DATA_DIR)):
        print os.getcwd()
        if len(csvfile) > 0:
            dbReplaceCounter(db, csvfile)
            if S.DBG_ALL:
                pprint.pprint(db.klseeod.find_one({0: '3A', 1: '2018-05-08'}))
        else:
            db.klseeod.drop()
            csvfiles = glob.glob("*.csv")
            for csvf in csvfiles:
                dbNewImport(db, csvf)
            if S.DBG_ALL:
                pprint.pprint(
                    db.klseeod.find_one({
                        '0': '3A',
                        '1': '2018-01-08'
                    }))
    def checkMPV():
        if resume:
            mpvfile = getDataDir(S.DATA_DIR) + S.MVP_DIR + shortname + '.csv'
            if not os.path.isfile(mpvfile):
                return
            lines = tail(mpvfile)
            ldata = lines.split(',')
            if ldata[1] == lastTradingDate:
                print "Resume mode: Skipped MPV downloaded ->", shortname
                return

        if 1 == 1:
            if any("klsemvp" in s for s in pypath):
                updateMPV(shortname, stockCode, eod)
        else:
            if any("klsemvp" in s for s in pypath):
                if updateMPV(shortname, stockCode, eod):
                    load_mvp_args(True)
                    if mvpSynopsis(shortname, stockCode, dojson=1):
                        if 1 == 0:  # 2018-12-21 skip to speed up daily download
                            load_mvp_args(False)
                            # 2018-12-21 limit to 300 due to AKNIGHT exceeds Locator.MAXTICKS error
                            mvpChart(shortname, stockCode, 300)
def writeLatestPrice(lastTradingDate=getToday('%Y-%m-%d'),
                     writeEOD=False,
                     resume=False,
                     recentDate=""):
    def checkMPV():
        if resume:
            mpvfile = getDataDir(S.DATA_DIR) + S.MVP_DIR + shortname + '.csv'
            if not os.path.isfile(mpvfile):
                return
            lines = tail(mpvfile)
            ldata = lines.split(',')
            if ldata[1] == lastTradingDate:
                print "Resume mode: Skipped MPV downloaded ->", shortname
                return

        if 1 == 1:
            if any("klsemvp" in s for s in pypath):
                updateMPV(shortname, stockCode, eod)
        else:
            if any("klsemvp" in s for s in pypath):
                if updateMPV(shortname, stockCode, eod):
                    load_mvp_args(True)
                    if mvpSynopsis(shortname, stockCode, dojson=1):
                        if 1 == 0:  # 2018-12-21 skip to speed up daily download
                            load_mvp_args(False)
                            # 2018-12-21 limit to 300 due to AKNIGHT exceeds Locator.MAXTICKS error
                            mvpChart(shortname, stockCode, 300)

    stocksListing = loadfromi3(S.DATA_DIR + "i3/" + lastTradingDate + ".json",
                               recent=recentDate)
    eodlist = []

    pypath = os.environ['PYTHONPATH'].split(os.pathsep)
    if any("klsemvp" in s for s in pypath):
        from analytics.mvp import updateMPV, load_mvp_args
        from analytics.mvpchart import mvpChart, mvpSynopsis
    print ' Writing latest price from i3 ...'
    for key in sorted(stocksListing.iterkeys()):
        eod, shortname, stockCode = unpackStockData(key, lastTradingDate,
                                                    stocksListing[key])
        outfile = getDataDir(S.DATA_DIR) + shortname + '.' + stockCode + '.csv'
        if resume:
            lines = tail(outfile)
            ldata = lines.split(',')
            if ldata[1] == lastTradingDate:
                print "Resume mode: Skipped downloaded ->", shortname
                checkMPV()
                continue
        eodlist.append(eod)
        if writeEOD:
            try:
                with open(outfile, "ab") as fh:
                    fh.write(eod + '\n')
            except Exception:
                print " ERR:WriteLatestPrice:", key, ':', shortname, ':', stockCode
                raise
        else:
            print eod

        checkMPV()

    return eodlist
Example #4
0
    sbchartlinks = []
    SB_URL = 'https://my.stockbit.com/#/symbol/KLSE-'
    stocklist = formStocklist(counters, getI3Dir() + 'klse.txt')
    for key in stocklist.iterkeys():
        if i3:
            i3chartlinks.append(S.I3_KLSE_URL + '/servlets/stk/chart/' +
                                stocklist[key] + '.jsp')
        if sb:
            sbchartlinks.append(SB_URL + key + '/chartbit')

    return i3chartlinks, sbchartlinks


if __name__ == '__main__':
    args = docopt(__doc__)
    loadCfg(getDataDir(S.DATA_DIR))
    counters = getCounters(args['COUNTER'], args['--portfolio'],
                           args['--watchlist'])
    if args['--debug']:
        S.DBG_ALL = True
    if S.DBG_ALL:
        print getTime(), counters

    if len(counters) > 0:
        i3chartlinks, sbchartlinks = compileLinks(args['--i3'], args['--sb'],
                                                  counters)
        if S.DBG_ALL:
            print getTime(), i3chartlinks
            print getTime(), sbchartlinks
    else:
        LOGIN_URL = S.I3_KLSE_URL + LOGIN_URL
Example #5
0
    klse = "../i3investor/klse.txt"
    stocks = ''
    if len(stocks) > 0:
        #  download only selected counters
        stocklist = formStocklist(stocks, klse)
    else:
        # Full download using klse.txt
        stocklist = loadKlseCounters(klse)

    cookie, crumb = getYahooCookie('https://uk.finance.yahoo.com/quote/AAPL/')

    for shortname in sorted(stocklist.iterkeys()):
        stock_code = stocklist[shortname]
        if len(stock_code) > 0:
            # OUTPUT_FILE = '../../data/yahoo/' + shortname + "." + stock_code + ".csv"
            OUTPUT_FILE = getDataDir(
                S.DATA_DIR) + 'yahoo/' + shortname + "." + stock_code + ".csv"
            if S.RESUME_FILE:
                lastdt = getLastDate(OUTPUT_FILE)
                if len(lastdt) == 0:
                    # File is likely to be empty, hence scrape from beginning
                    lastdt = S.ABS_START
            print shortname, stock_code
            print "Scraping", shortname, stock_code, lastdt, '$'
            q = YahooQuote(cookie, crumb, shortname, stock_code + ".KL",
                           lastdt, "2018-02-01")
            if len(q.getCsvErr()) > 0:
                st_code, st_reason = q.getCsvErr().split(":")
                rtn_code = int(st_code)
                print rtn_code, st_reason
            else:
                print q
    def to_df(self):
        """
        returns a pandas DataFrame object based on parsed data from a
        Commodity object's HTML
        """
        try:
            df = pd.read_html(self.response)
            df = df[0]  # Ignore footer table
            if S.DBG_ALL:
                df.to_csv(getDataDir(S.DATA_DIR) + self.name + ".inf")
            price = df['Price'][0]
            # print self.name, type(price), price
            if math.isnan(price):
                # No result found
                return None
            df["Date"] = pd.to_datetime(df["Date"])
            df.insert(0, "Commodity", np.nan)
            df["Commodity"] = self.name
            df.insert(6, "Close", np.nan)
            df["Close"] = df["Price"]
            df.insert(7, "Volume", np.nan)

            if self.name.startswith('USD'):
                df['Volume'] = 0
            elif self.name.startswith('FTFBM'):
                df['Volume'] = df["Vol."]
            else:
                mp = {'K': ' * 10**3', 'M': ' * 10**6'}
                # vol = df['Vol.'][0]
                # print type(vol), vol
                df['Vol.'] = df['Vol.'].replace('-', '0.1K')
                df['Vol.'] = df['Vol.'].replace(
                    0, '0.1K')  # replace all 0 vol with 100 shares
                '''
                Convert k to 1000 and m to 1000000
                Important: Can only support max 5 months of EOD to convert
                '''
                try:
                    df["Volume"] = pd.eval(df["Vol."].replace(
                        mp.keys(), mp.values(),
                        regex=True).str.replace(r'[^\d\.\*]+', ''))
                except Exception:
                    df['Volume'] = df["Vol."]

            df.drop('Price', axis=1, inplace=True)
            df.drop('Change %', axis=1, inplace=True)
            if 'Vol.' in df.columns:
                # FOREX has no "Vol." column
                df.drop('Vol.', axis=1, inplace=True)
            df.sort_values(by='Date', inplace=True)
        except ValueError as ve:
            df = 'ValueError'
            self.csverr = self.name + ": ValueError (No data for date range) " + ' (' + str(
                ve) + ')'
            if S.DBG_ALL:
                with open(getDataDir(S.DATA_DIR) + "value.err", 'ab') as f:
                    f.write('\n=============================\n')
                    f.write(self.name + "\n")
                    f.write(self.response)
        except Exception as e:
            # This happens when records being processed are larger than 3 months data,
            # try reducing the period
            if S.DBG_ALL:
                with open(getDataDir(S.DATA_DIR) + "value.err", 'ab') as f:
                    f.write('\n=============================\n')
                    f.write(self.name + "\n")
                    f.write(self.response)
            self.csverr = self.name + ":" + self.start + "," + self.end + ":" + str(
                e)
            df = 'Exception'
            # raise e

        return df
def scrapeKlseRelated(klsemap, WRITE_CSV=True, dbg=False):
    idmap = loadIdMap(klsemap)
    # counters = 'USDMYR.2168,FTFBM100.0200,FTFBMKLCI.0201,FTFBMMES.0202,FTFBMSCAP.0203,FTFBM70.0204,FTFBMEMAS.0205'
    counterlist = S.KLSE_RELATED.split(',')
    eodlist = []
    for i in counterlist:
        counter = i.split('.')
        shortname = counter[0]
        stock_code = counter[1]
        rtn_code = 0
        OUTPUT_FILE = getDataDir(
            S.DATA_DIR) + shortname + "." + stock_code + ".csv"
        TMP_FILE = OUTPUT_FILE + 'tmp'
        if S.RESUME_FILE:
            lastdt = getLastDate(OUTPUT_FILE)
            if len(lastdt) == 0:
                # File is likely to be empty, hence scrape from beginning
                lastdt = S.ABS_START
        else:
            lastdt = S.ABS_START
        enddt = getToday('%Y-%m-%d')
        if lastdt == enddt:
            print "Skipped downloaded:", counter
            continue
        print 'Scraping {0},{1}: lastdt={2}, End={3}'.format(
            shortname, stock_code, lastdt, enddt)
        while True:
            startdt = lastdt
            if getDaysBtwnDates(lastdt,
                                enddt) > 22 * 3:  # do 3 months at a time
                stopdt = getDayOffset(startdt, 22 * 3)
                lastdt = getNextBusinessDay(stopdt)
            else:
                stopdt = enddt
            eod = InvestingQuote(idmap, shortname, startdt, stopdt)
            if dbg:
                for item in eod:
                    print item
            if len(eod.getCsvErr()) > 0:
                print eod.getCsvErr()
            elif isinstance(eod.response, unicode):
                dfEod = eod.to_df()
                if isinstance(dfEod, pd.DataFrame):
                    if dbg:
                        print dfEod[:5]
                    if WRITE_CSV:
                        dfEod.index.name = 'index'
                        dfEod.to_csv(TMP_FILE, index=False, header=False)
                    dates = pd.to_datetime(dfEod["Date"], format='%Y%m%d')
                    dfEod["Date"] = dates.dt.strftime('%Y-%m-%d').tolist()
                    elist = dfEod.values.tolist()[0]
                    eodlist.append(','.join(map(str, unpackEOD(*elist))))
                else:
                    print "ERR:" + dfEod + ": " + shortname + "," + lastdt
                    rtn_code = -2

            if WRITE_CSV:
                appendCsv(rtn_code, OUTPUT_FILE)

            if stopdt == enddt:
                break

    return eodlist
    else:
        # Full download using klse.txt
        writeStocksListing = False
        if writeStocksListing:
            writeStocksListing()
        stocklist = loadKlseCounters(klse)

    for shortname in sorted(stocklist.iterkeys()):
        if shortname in S.EXCLUDE_LIST:
            print "Exclude: ", shortname
            continue
        stock_code = stocklist[shortname]
        if len(stock_code) > 0:
            rtn_code = 0
            OUTPUT_FILE = getDataDir(
                S.DATA_DIR
            ) + 'investingcom/' + shortname + "." + stock_code + ".csv"
            TMP_FILE = OUTPUT_FILE + 'tmp'
            if S.RESUME_FILE:
                lastdt = getLastDate(OUTPUT_FILE)
                if len(lastdt) == 0:
                    # File is likely to be empty, hence scrape from beginning
                    lastdt = S.ABS_START
            else:
                lastdt = S.ABS_START
            enddt = getToday('%Y-%m-%d')
            print 'Scraping {0},{1}: lastdt={2}, End={3}'.format(
                shortname, stock_code, lastdt, enddt)
            failcount = 0
            while True:
                if failcount == 0:
Example #9
0
    stk_list = []
    if len(lastFinDate) > 0:
        stk_list = scrape_latest_fin(connect_stk_fin(''), lastFinDate)
    else:
        klse = S.KLSE_LIST
        if len(stocks) > 0:
            #  download only selected counters
            stk_list = formStocklist(stocks, klse)
        else:
            stk_list = loadKlseCounters(klse)

    for stk_name in stk_list:
        stk_code = stk_list[stk_name]
        print 'Downloading financial for', stk_name, stk_code
        if len(stk_code) == 4:
            stkfin = scrape_stk_fin(connect_stk_fin(stk_code), lastFinDate)
            if stkfin is not None:
                fh = open(
                    getDataDir(S.DATA_DIR) + stk_name + '.' + stk_code +
                    ".fin", "w")
                for key in sorted(stkfin.iterkeys()):
                    fin = ','.join(map(str, unpack_fin(key, *(stkfin[key]))))
                    print fin
                    fh.write(fin + '\n')
                fh.close()
            else:
                print 'Skipped:', stk_name, stk_code

    pass
Example #10
0
        with cd(S.DATA_DIR):
            mongod = subprocess.Popen(
                ['mongod', '--dbpath',
                 os.path.expanduser(S.DATA_DIR)])


def initKlseDB():
    startMongoD()
    global mongo_client
    mongo_client = MongoClient()
    db = mongo_client.klsedb
    return db


def closeKlseDB():
    if mongo_client is not None:
        print 'Terminating Mongo Client ...'
        mongo_client.close()
    if mongod is not None:
        print 'Terminating MongoDB daemon ...'
        mongod.terminate()


if __name__ == '__main__':
    loadCfg(S.DATA_DIR)
    with cd(getDataDir(S.DATA_DIR)):
        exportCounters()
        exportQuotes()
    closeKlseDB()
    pass