Esempio n. 1
0
 def __init__(self, stock_num):
     self.mongo = MongodbAPI()
     self.stock_num = stock_num
     self.htmlreq = HtmlRequests()
     self.req = self.htmlreq.get_session(SESSIONURL)
     now = datetime.now()
     self.stop_date = datetime(now.year, now.month, now.day, 13, 30, 10)
Esempio n. 2
0
 def __init__(self, stock_num: str, start_year: int, start_month: int):
     self.mongo = MongodbAPI()
     self.stock_num = stock_num
     self.htmlreq = HtmlRequests()
     self.req = self.htmlreq.get_session(SESSIONURL)
     self.req.keep_alive = False
     self.start_year = start_year
     self.start_month = start_month
     self.now_date = datetime.now()
     self.retry = 0
Esempio n. 3
0
 def parser(self, stock_num, url) -> list:
     daily = []
     htmlparser = HtmlRequests()
     tree = htmlparser.get_html(url)
     if tree == None:
         return daily
     now = datetime.now()
     for i in tree.xpath('//div[@id="TickHeight"]/table/tr'):
         time = i.xpath('td[1]/text()')[0]
         buying = i.xpath('td[2]/text()')[0]
         selling = i.xpath('td[3]/text()')[0]
         if buying == '--' or selling == '--':
             continue
         transaction = i.xpath('td[4]/text()')[0]
         tmp_ups_and_downs = i.xpath('td[5]/text()')[0].split(" ")
         ups_and_downs = ""
         if len(tmp_ups_and_downs) < 2:
             ups_and_downs = "0.0"
         elif tmp_ups_and_downs[0] == "▼":
             ups_and_downs = "-" + tmp_ups_and_downs[1]
         elif tmp_ups_and_downs[0] == "▲":
             ups_and_downs = tmp_ups_and_downs[1]
         stock_volume = i.xpath('td[6]/text()')[0]
         time_tmp = time.split(':')
         date = datetime(now.year, now.month, now.day, int(time_tmp[0]),
                         int(time_tmp[1]), int(time_tmp[2]))
         if self.mongo.CheckExists("Transaction_details",
                                   str(stock_num) + "@" + date.isoformat()):
             continue
         daily.append({
             '_id': stock_num + "@" + date.isoformat(),
             'ts': int(date.timestamp()),
             'stock': stock_num,
             'date': date,
             'buying': float(buying),
             'selling': float(selling),
             'transaction': float(transaction),
             'ups_and_downs': float(ups_and_downs),
             'stock_volume': int(stock_volume)
         })
     return daily
Esempio n. 4
0
 def __init__(self, date: datetime):
     self.mongo = MongodbAPI()
     self.htmlreq = HtmlRequests()
     self.__date = date
     pass
Esempio n. 5
0
class Institutional_investors():
    def __init__(self, date: datetime):
        self.mongo = MongodbAPI()
        self.htmlreq = HtmlRequests()
        self.__date = date
        pass

    def start(self):
        date = self.__date.strftime("%Y%m%d")
        source_url = TSELEGALPERSON.format(date=date)
        self.__crawl(source_url, self.__date.strftime("%Y/%m/%d"))
        pass

    def __crawl(self, url, date):
        json_data = self.htmlreq.get_json(requests, source_url=url)
        if json_data.get('stat', None) != "OK":
            logging.debug("This day not Opening :%s" % (date))
            return
        data = self.__parser(json_data, date)
        err = self.mongo.Insert_Many_Data_To("stock_information", data)
        if err:
            logging.info("Insert Institutional investors to mongo , date: %s",
                         date)
        else:
            logging.warn(
                "Fail to Insert Institutional investors to mongo , url: %s",
                url)

    def __parser(self, j, date) -> list:
        data = []
        for i in j['data']:
            i = [x.replace(',', '') for x in i]
            if len(j['fields']) == 12:
                data.append({
                    '_id':
                    str(i[0]) + "@" + date,
                    'date':
                    datetime.strptime(date, "%Y/%m/%d"),
                    'stock_num':
                    str(i[0]),
                    'foreign_investment_dealer_buy':
                    float(i[2]),
                    'foreign_investment_dealer_sell':
                    float(i[3]),
                    'foreign_investment_dealer_net_buy_sell':
                    float(i[4]),
                    'institutional_investors_net_buy_sell':
                    float(i[5]),
                    'investment_trust_buy':
                    float(i[6]),
                    'investment_trust_sell':
                    float(i[7]),
                    'investment_trust_net_buy_sell':
                    float(i[8]),
                    'dealer_buy(Self-purchase)':
                    float(i[9]),
                    'dealer_sell(Self-purchase)':
                    float(i[10]),
                    'dealer_net_buy_sell':
                    float(i[11]),
                })
            elif len(j['fields']) < 18:
                data.append({
                    '_id':
                    str(i[0]) + "@" + date,
                    'date':
                    datetime.strptime(date, "%Y/%m/%d"),
                    'stock_num':
                    str(i[0]),
                    'foreign_investment_buy':
                    float(i[2]),
                    'foreign_investment_sell':
                    float(i[3]),
                    'foreign_investment_net_buy_sell':
                    float(i[4]),
                    'foreign_investment_dealer_buy':
                    float(i[5]),
                    'foreign_investment_dealer_sell':
                    float(i[6]),
                    'foreign_investment_dealer_net_buy_sell':
                    float(i[7]),
                    'investment_trust_buy':
                    float(i[8]),
                    'investment_trust_sell':
                    float(i[9]),
                    'investment_trust_net_buy_sell':
                    float(i[10]),
                    'dealer_net_buy_sell':
                    float(i[11]),
                    'dealer_buy':
                    float(i[12]),
                    'dealer_sell':
                    float(i[13]),
                    'institutional_investors_net_buy_sell':
                    float(i[14]),
                })
            elif len(j['fields']) == 18:
                data.append({
                    '_id':
                    str(i[0]) + "@" + date,
                    'date':
                    datetime.strptime(date, "%Y/%m/%d"),
                    'stock_num':
                    str(i[0]),
                    'foreign_investment_buy':
                    float(i[2]),
                    'foreign_investment_sell':
                    float(i[3]),
                    'foreign_investment_net_buy_sell':
                    float(i[4]),
                    'foreign_investment_dealer_buy':
                    float(i[5]),
                    'foreign_investment_dealer_sell':
                    float(i[6]),
                    'foreign_investment_dealer_net_buy_sell':
                    float(i[7]),
                    'investment_trust_buy':
                    float(i[8]),
                    'investment_trust_sell':
                    float(i[9]),
                    'investment_trust_net_buy_sell':
                    float(i[10]),
                    'dealer_net_buy_sell':
                    float(i[11]),
                    'dealer_buy(Self-purchase)':
                    float(i[12]),
                    'dealer_sell(Self-purchase)':
                    float(i[13]),
                    'dealer_net_buy_sell(Self-purchase)':
                    float(i[14]),
                    'dealer_buy(Hedging)':
                    float(i[15]),
                    'dealer_sell(Hedging)':
                    float(i[16]),
                    'dealer_net_buy_sell(Hedging)':
                    float(i[17]),
                    'institutional_investors_net_buy_sell':
                    float(i[18]),
                })
        return data
Esempio n. 6
0
class Daily_stock_info(object):
    def __init__(self, date):
        self.__mongo = MongodbAPI()
        self.__htmlreq = HtmlRequests()
        self.__date = date
        pass

    def start(self):
        date = self.__date.strftime("%Y%m%d")
        source_url = DAILYSTOCKINFO.format(date=date)
        data = self.__crawl(source_url, self.__date.strftime("%Y/%m/%d"))
        if data != None:
            err = self.__mongo.Insert_Many_Data_To('stock_daily_info', data)
            if err:
                logging.info("Insert stock daily info to mongo , date: %s",
                             date)
            else:
                logging.warn(
                    "Fail to Insert stock daily info to mongo , url: %s",
                    source_url)
        return

    def __crawl(self, url, date):
        for i in range(10):
            j = self.__htmlreq.get_json(requests, url)
            if j == {} or j['stat'] != 'OK':
                return None

            rows = []
            if 'data5' in j:
                rows = [
                    x for x in j['data5'] if len(x[0]) == 4 and x[-1] != '0.00'
                ]
            elif 'data4' in j:
                rows = [
                    x for x in j['data4'] if len(x[0]) == 4 and x[-1] != '0.00'
                ]
            else:
                logging.warn("The daily info not have data5 or data4 url: %s",
                             url)
                return None
            data = self.__parser(date, rows)
            return data
        else:
            logging.error("Fail to parser daily stock info , url: %s", url)

    def __parser(self, date, rows: list) -> list:
        data = []
        for i in rows:
            data.append({
                '_id':
                i[0] + "@" + date,
                'stock':
                i[0],
                'date':
                datetime.strptime(date, "%Y/%m/%d"),
                'ts':
                int(datetime.timestamp(datetime.strptime(date, "%Y/%m/%d"))),
                'transaction':
                float(i[3].replace(',', '')),
                'open':
                self.__get_float(i[5]),
                'high':
                self.__get_float(i[6]),
                'low':
                self.__get_float(i[7]),
                'close':
                self.__get_float(i[8]),
                'change':
                self.__get_sign_float(i[9], i[10]),
                'price_earning':
                float(i[-1].replace(',', '')),
            })
        return data

    def __get_sign_float(self, sign, num) -> float:
        if "-" in sign:
            return float("-" + num)
        elif "+" in sign:
            return float(num)
        else:
            return 0.0

    def __get_float(self, num) -> float:
        if num.replace(',', '') == 'X0.00':
            return 0.0
        elif num == '--':
            return None
        else:
            return float(num.replace(',', ''))
Esempio n. 7
0
class TWSE_realtime():
    def __init__(self, stock_num):
        self.mongo = MongodbAPI()
        self.stock_num = stock_num
        self.htmlreq = HtmlRequests()
        self.req = self.htmlreq.get_session(SESSIONURL)
        now = datetime.now()
        self.stop_date = datetime(now.year, now.month, now.day, 13, 30, 10)

    def start(self):
        self.crawl()

    def crawl(self):
        now = datetime.now()
        if now < self.stop_date:
            threading.Timer(5.0, self.crawl).start()
        now_time = int(time.time()) * 1000
        source_url = TWSEREALTIMEURL.format(
            stock_num=self.stock_num, time=now_time)
        json_data = self.htmlreq.get_json(self.req, source_url)
        data = self.parser(json_data)
        if data == None:
            return
        e = self.mongo.CheckExists('Realtime_data', data.get('_id', None))
        if e == False:
            for i in range(5):
                err = self.mongo.Insert_Data_To("Realtime_data", data)
                if err:
                    logging.info("Insert realtime data to mongo, id:%s" %
                                 (data.get("_id")))
                    return
            else:
                logging.error(
                    "Fail to insert realtime data to mongo, id:%s" % (data.get("_id")))

    def parser(self, j: json):
        # Process best result
        if len(j['msgArray']) == 0:
            return None
        data = j['msgArray'][0]

        def _split_best(d):
            if d:
                return d.strip('_').split('_')
            return d

        time = datetime.fromtimestamp(
            int(data['tlong']) / 1000).strftime('%Y-%m-%d %H:%M:%S')
        date = datetime.strptime(time, '%Y-%m-%d %H:%M:%S')
        return {
            "_id": str(self.stock_num) + "@"+time,
            "code": self.stock_num,
            'ts': int(time.mktime(date.timetuple())),
            "time": date,
            "latest_trade_price": float(data.get('z', None)),
            "trade_volume": float(data.get('tv', None)),
            "accumulate_trade_volume": float(data.get('v', None)),
            "best_bid_price": [float(x) for x in _split_best(data.get('b', None))],
            "best_bid_volume": [float(x) for x in _split_best(data.get('g', None))],
            "best_ask_price": [float(x) for x in _split_best(data.get('a', None))],
            "best_ask_volume": [float(x) for x in _split_best(data.get('f', None))],
            "open": float(data.get('o', None)),
            "high": float(data.get('h', None)),
            "low": float(data.get('l', None))
        }
Esempio n. 8
0
class TWSE_daily():
    def __init__(self, stock_num: str, start_year: int, start_month: int):
        self.mongo = MongodbAPI()
        self.stock_num = stock_num
        self.htmlreq = HtmlRequests()
        self.req = self.htmlreq.get_session(SESSIONURL)
        self.req.keep_alive = False
        self.start_year = start_year
        self.start_month = start_month
        self.now_date = datetime.now()
        self.retry = 0

    def start(self):
        now_year = self.start_year
        now_month = self.start_month
        self.crawl(now_year, now_month)

    def crawl(self, year, month):
        logging.debug("%s/%s" % (year, month))
        source_url = TWSEREALTIMEURL.format(
            stock_num=self.stock_num, time="%d%02d01" % (year, month))
        json_data = self.htmlreq.get_json(self.req, source_url)
        if json_data == {} and self.retry < 5:
            self.retry += 1
            self.crawl(year, month)
        else:
            logging.error("Can't get old daily stock %s@%s-%s ,url : %s " %
                          (self.stock_num, year, month, source_url))
        self.retry = 0
        data = self.parser(json_data.get('data', None))

        if data != None and len(data) > 0:
            for i in range(5):
                err = self.mongo.Insert_Many_Data_To("Daily_data", data)
                if err == True:
                    logging.info("Insert Daily data %s@%s-%s" %
                                 (self.stock_num, year, month))
                    break
            else:
                logging.error("Fail, Insert Daily data to Mongo,id: %s@%s-%s" %
                              (self.stock_num, year, month))
        date = self._get_next_date(year, month)
        if date['year'] >= self.now_date.year and date['month'] > self.now_date.month:
            logging.info("Done crawl Daily data , %s@%s/%s" %
                         (self.stock_num, date['year'], date['month']))
            return
        # Start to crawl new year, month
        self.crawl(date["year"], date["month"])

    def _convert_date(self, date):
        """Convert '106/05/01' to '2017/05/01'"""
        return '/'.join([str(int(date.split('/')[0]) + 1911)] + date.split('/')[1:])

    def parser(self, j: json) -> list:
        data = []
        if j == None:
            return data
        for item in j:
            date = datetime.strptime(
                self._convert_date(item[0]), '%Y/%m/%d')
            _id = self.stock_num + "@"+date.strftime("%Y/%m/%d")

            e = self.mongo.CheckExists(
                "Daily_data", _id)
            if e:
                logging.debug("Insert Daily data ,id :%s exists" %
                              (_id))
                continue
            try:
                data.append({
                    '_id': _id,
                    'stock': self.stock_num,
                    'date': date,
                    'ts': int(time.mktime(date.timetuple())),
                    'capacity': int(item[1].replace(',', '')),
                    'turnover': int(item[2].replace(',', '')),
                    'open': self._get_float(item[3]),
                    'high': self._get_float(item[4]),
                    'low': self._get_float(item[5]),
                    'close':  self._get_float(item[6]),
                    'change':  self._get_float(item[7]),
                    'transaction': int(item[8].replace(',', ''))
                })
            except Exception as e:
                logging.error("daily data fail :%s %s" % (item, e))
                continue
        return data

    def _get_next_date(self, year, month) -> dict:
        if month < 12:
            month += 1
        else:
            year += 1
            month = 1
        return {
            'year': year,
            'month': month
        }

    def _get_float(self, number: str):
        if number.replace(',', '') == 'X0.00':
            return 0.0
        elif number == '--':
            return None
        else:
            return float(number.replace(',', ''))
        return