Esempio n. 1
0
class Daily_stock_info(object):
    def __init__(self, date):
        self.__mongo = MongodbAPI()
        self.__htmlreq = HtmlRequests()
        self.__date = date
        pass

    def start(self):
        date = self.__date.strftime("%Y%m%d")
        source_url = DAILYSTOCKINFO.format(date=date)
        data = self.__crawl(source_url, self.__date.strftime("%Y/%m/%d"))
        if data != None:
            err = self.__mongo.Insert_Many_Data_To('stock_daily_info', data)
            if err:
                logging.info("Insert stock daily info to mongo , date: %s",
                             date)
            else:
                logging.warn(
                    "Fail to Insert stock daily info to mongo , url: %s",
                    source_url)
        return

    def __crawl(self, url, date):
        for i in range(10):
            j = self.__htmlreq.get_json(requests, url)
            if j == {} or j['stat'] != 'OK':
                return None

            rows = []
            if 'data5' in j:
                rows = [
                    x for x in j['data5'] if len(x[0]) == 4 and x[-1] != '0.00'
                ]
            elif 'data4' in j:
                rows = [
                    x for x in j['data4'] if len(x[0]) == 4 and x[-1] != '0.00'
                ]
            else:
                logging.warn("The daily info not have data5 or data4 url: %s",
                             url)
                return None
            data = self.__parser(date, rows)
            return data
        else:
            logging.error("Fail to parser daily stock info , url: %s", url)

    def __parser(self, date, rows: list) -> list:
        data = []
        for i in rows:
            data.append({
                '_id':
                i[0] + "@" + date,
                'stock':
                i[0],
                'date':
                datetime.strptime(date, "%Y/%m/%d"),
                'ts':
                int(datetime.timestamp(datetime.strptime(date, "%Y/%m/%d"))),
                'transaction':
                float(i[3].replace(',', '')),
                'open':
                self.__get_float(i[5]),
                'high':
                self.__get_float(i[6]),
                'low':
                self.__get_float(i[7]),
                'close':
                self.__get_float(i[8]),
                'change':
                self.__get_sign_float(i[9], i[10]),
                'price_earning':
                float(i[-1].replace(',', '')),
            })
        return data

    def __get_sign_float(self, sign, num) -> float:
        if "-" in sign:
            return float("-" + num)
        elif "+" in sign:
            return float(num)
        else:
            return 0.0

    def __get_float(self, num) -> float:
        if num.replace(',', '') == 'X0.00':
            return 0.0
        elif num == '--':
            return None
        else:
            return float(num.replace(',', ''))
Esempio n. 2
0
class Institutional_investors():
    def __init__(self, date: datetime):
        self.mongo = MongodbAPI()
        self.htmlreq = HtmlRequests()
        self.__date = date
        pass

    def start(self):
        date = self.__date.strftime("%Y%m%d")
        source_url = TSELEGALPERSON.format(date=date)
        self.__crawl(source_url, self.__date.strftime("%Y/%m/%d"))
        pass

    def __crawl(self, url, date):
        json_data = self.htmlreq.get_json(requests, source_url=url)
        if json_data.get('stat', None) != "OK":
            logging.debug("This day not Opening :%s" % (date))
            return
        data = self.__parser(json_data, date)
        err = self.mongo.Insert_Many_Data_To("stock_information", data)
        if err:
            logging.info("Insert Institutional investors to mongo , date: %s",
                         date)
        else:
            logging.warn(
                "Fail to Insert Institutional investors to mongo , url: %s",
                url)

    def __parser(self, j, date) -> list:
        data = []
        for i in j['data']:
            i = [x.replace(',', '') for x in i]
            if len(j['fields']) == 12:
                data.append({
                    '_id':
                    str(i[0]) + "@" + date,
                    'date':
                    datetime.strptime(date, "%Y/%m/%d"),
                    'stock_num':
                    str(i[0]),
                    'foreign_investment_dealer_buy':
                    float(i[2]),
                    'foreign_investment_dealer_sell':
                    float(i[3]),
                    'foreign_investment_dealer_net_buy_sell':
                    float(i[4]),
                    'institutional_investors_net_buy_sell':
                    float(i[5]),
                    'investment_trust_buy':
                    float(i[6]),
                    'investment_trust_sell':
                    float(i[7]),
                    'investment_trust_net_buy_sell':
                    float(i[8]),
                    'dealer_buy(Self-purchase)':
                    float(i[9]),
                    'dealer_sell(Self-purchase)':
                    float(i[10]),
                    'dealer_net_buy_sell':
                    float(i[11]),
                })
            elif len(j['fields']) < 18:
                data.append({
                    '_id':
                    str(i[0]) + "@" + date,
                    'date':
                    datetime.strptime(date, "%Y/%m/%d"),
                    'stock_num':
                    str(i[0]),
                    'foreign_investment_buy':
                    float(i[2]),
                    'foreign_investment_sell':
                    float(i[3]),
                    'foreign_investment_net_buy_sell':
                    float(i[4]),
                    'foreign_investment_dealer_buy':
                    float(i[5]),
                    'foreign_investment_dealer_sell':
                    float(i[6]),
                    'foreign_investment_dealer_net_buy_sell':
                    float(i[7]),
                    'investment_trust_buy':
                    float(i[8]),
                    'investment_trust_sell':
                    float(i[9]),
                    'investment_trust_net_buy_sell':
                    float(i[10]),
                    'dealer_net_buy_sell':
                    float(i[11]),
                    'dealer_buy':
                    float(i[12]),
                    'dealer_sell':
                    float(i[13]),
                    'institutional_investors_net_buy_sell':
                    float(i[14]),
                })
            elif len(j['fields']) == 18:
                data.append({
                    '_id':
                    str(i[0]) + "@" + date,
                    'date':
                    datetime.strptime(date, "%Y/%m/%d"),
                    'stock_num':
                    str(i[0]),
                    'foreign_investment_buy':
                    float(i[2]),
                    'foreign_investment_sell':
                    float(i[3]),
                    'foreign_investment_net_buy_sell':
                    float(i[4]),
                    'foreign_investment_dealer_buy':
                    float(i[5]),
                    'foreign_investment_dealer_sell':
                    float(i[6]),
                    'foreign_investment_dealer_net_buy_sell':
                    float(i[7]),
                    'investment_trust_buy':
                    float(i[8]),
                    'investment_trust_sell':
                    float(i[9]),
                    'investment_trust_net_buy_sell':
                    float(i[10]),
                    'dealer_net_buy_sell':
                    float(i[11]),
                    'dealer_buy(Self-purchase)':
                    float(i[12]),
                    'dealer_sell(Self-purchase)':
                    float(i[13]),
                    'dealer_net_buy_sell(Self-purchase)':
                    float(i[14]),
                    'dealer_buy(Hedging)':
                    float(i[15]),
                    'dealer_sell(Hedging)':
                    float(i[16]),
                    'dealer_net_buy_sell(Hedging)':
                    float(i[17]),
                    'institutional_investors_net_buy_sell':
                    float(i[18]),
                })
        return data
Esempio n. 3
0
class TWSE_daily():
    def __init__(self, stock_num: str, start_year: int, start_month: int):
        self.mongo = MongodbAPI()
        self.stock_num = stock_num
        self.htmlreq = HtmlRequests()
        self.req = self.htmlreq.get_session(SESSIONURL)
        self.req.keep_alive = False
        self.start_year = start_year
        self.start_month = start_month
        self.now_date = datetime.now()
        self.retry = 0

    def start(self):
        now_year = self.start_year
        now_month = self.start_month
        self.crawl(now_year, now_month)

    def crawl(self, year, month):
        logging.debug("%s/%s" % (year, month))
        source_url = TWSEREALTIMEURL.format(
            stock_num=self.stock_num, time="%d%02d01" % (year, month))
        json_data = self.htmlreq.get_json(self.req, source_url)
        if json_data == {} and self.retry < 5:
            self.retry += 1
            self.crawl(year, month)
        else:
            logging.error("Can't get old daily stock %s@%s-%s ,url : %s " %
                          (self.stock_num, year, month, source_url))
        self.retry = 0
        data = self.parser(json_data.get('data', None))

        if data != None and len(data) > 0:
            for i in range(5):
                err = self.mongo.Insert_Many_Data_To("Daily_data", data)
                if err == True:
                    logging.info("Insert Daily data %s@%s-%s" %
                                 (self.stock_num, year, month))
                    break
            else:
                logging.error("Fail, Insert Daily data to Mongo,id: %s@%s-%s" %
                              (self.stock_num, year, month))
        date = self._get_next_date(year, month)
        if date['year'] >= self.now_date.year and date['month'] > self.now_date.month:
            logging.info("Done crawl Daily data , %s@%s/%s" %
                         (self.stock_num, date['year'], date['month']))
            return
        # Start to crawl new year, month
        self.crawl(date["year"], date["month"])

    def _convert_date(self, date):
        """Convert '106/05/01' to '2017/05/01'"""
        return '/'.join([str(int(date.split('/')[0]) + 1911)] + date.split('/')[1:])

    def parser(self, j: json) -> list:
        data = []
        if j == None:
            return data
        for item in j:
            date = datetime.strptime(
                self._convert_date(item[0]), '%Y/%m/%d')
            _id = self.stock_num + "@"+date.strftime("%Y/%m/%d")

            e = self.mongo.CheckExists(
                "Daily_data", _id)
            if e:
                logging.debug("Insert Daily data ,id :%s exists" %
                              (_id))
                continue
            try:
                data.append({
                    '_id': _id,
                    'stock': self.stock_num,
                    'date': date,
                    'ts': int(time.mktime(date.timetuple())),
                    'capacity': int(item[1].replace(',', '')),
                    'turnover': int(item[2].replace(',', '')),
                    'open': self._get_float(item[3]),
                    'high': self._get_float(item[4]),
                    'low': self._get_float(item[5]),
                    'close':  self._get_float(item[6]),
                    'change':  self._get_float(item[7]),
                    'transaction': int(item[8].replace(',', ''))
                })
            except Exception as e:
                logging.error("daily data fail :%s %s" % (item, e))
                continue
        return data

    def _get_next_date(self, year, month) -> dict:
        if month < 12:
            month += 1
        else:
            year += 1
            month = 1
        return {
            'year': year,
            'month': month
        }

    def _get_float(self, number: str):
        if number.replace(',', '') == 'X0.00':
            return 0.0
        elif number == '--':
            return None
        else:
            return float(number.replace(',', ''))
        return