Example #1
0
def get_stock_by_deal(stocks: List):
    """Lấy dữ liệu theo từng lệnh khớp.
    Dữ liệu được lấy về theo dạng stack nên cần so sánh giá trị sID nếu mới hơn thì tiến hành insert.
    Định kỳ chạy 30s/lần
    """
    def get_latest_sid(stock):
        with loadSession() as session:
            rs = session.query(
                StockByDeal.sid).filter(StockByDeal.sym == stock).order_by(
                    StockByDeal.trading_date).limit(1).all()
        return rs if rs else 0

    for stock in stocks:
        r = requests.get(URL_STOCK_TRADE + stock)
        raw = eval(r.text)
        latest_sid = get_latest_sid(stock)
        updated_data = next(row for row in raw if int(row["sID"] > latest_sid))
        updated_index = raw.index(updated_data)
        if updated_index:
            with loadSession() as session:
                session.execute(StockByDeal.insert(), raw[updated_data])
                session.commit()
            logging.info(f"Update for {stock}")
        else:
            continue
Example #2
0
class Customer:

    customer = model.Customer
    session = model.loadSession()
    res = session.query(customer).all()

    def __init__(self):
        pass

    @staticmethod
    def get_cust_ids():
        ids = []
        for item in Customer.res:
            ids.append(item.customer_id)
        return ids

    @staticmethod
    def get_cust_info(customer_id):
        session = model.loadSession()
        for result in session.query(model.Customer).filter(
                model.Customer.customer_id == customer_id):
            return result.customer_name, result.address1, result.address2, result.city, result.state, result.zip
        session.close()

    @staticmethod
    def get_cust_names():
        names = []
        for item in Customer.res:
            names.append(item.customer_name)
        return names
Example #3
0
    def save_match500_team_standing(self, item):
        session = loadSession()

        session.query(Match500TeamStanding).filter(
            Match500TeamStanding.match_id == item['match_id'],
            Match500TeamStanding.type == item['type'],
            Match500TeamStanding.team_type == item['team_type']).delete()

        standing = Match500TeamStanding()
        standing.match_id = item['match_id']
        standing.type = item['type']
        standing.team_type = item['team_type']
        standing.match_count = item['match_count']
        standing.win = item['win']
        standing.draw = item['draw']
        standing.lose = item['lose']
        standing.goals = item['goals']
        standing.lost_goals = item['lost_goals']
        standing.total_goals = item['total_goals']
        standing.marks = item['marks']
        standing.standing = item['standing']
        standing.win_rate = item['win_rate']

        session.add(standing)
        session.commit()
Example #4
0
def get_daily_stock(stocks: List, _from: int = None) -> None:
    """Lấy dữ liệu cuối ngày và cập nhật vào CSDL.
    Nếu dữ liệu tại thời điểm call bằng với mã đã trong lịch sử thì bỏ qua.
    Nếu dữ liệu tại thời điểm call khác với mã đã có trong lịch sử thì tiến hành call lại toàn bộ
    và update lại dữ liệu mã đó.
    Định kỳ: mỗi ngày 1 lần vào lúc 23:00PM
    Params:
    ----
     - stocks (List): danh sách mã 
     - _from (int{unix timestamp}): lấy dữ liệu từ ngày nào, mặc định sẽ lấy từ ngày trước thời điểm gọi api

    Returns:
    ----
    """
    _to = int(time.time())
    if not _from:
        yesterday = (datetime.today() - timedelta(1)).timestamp()
        _from = int(yesterday)

    for stock in stocks:
        r = requests.get(URL_STOCK_EOD.format(stock, _from, _to))
        raw = eval(r.text)
        if raw["s"] == "ok":
            inserted_data = transformer_for_daily_stock(stock, raw)
            with loadSession() as session:
                session.bulk_insert_mappings(Daily, inserted_data, return_defaults=True)
                session.commit()
            logging.info(f"Cập nhật dữ liệu cho {stock} ngày {_to}")
        else:
            date_to_str = date.fromtimestamp(_to).strftime("%Y/%m/%d")
            logging.warning(f"Dữ liệu cuối ngày {date_to_str} của {stock} không tồn tại")
            continue
def rebuild_model():
    model._Base.metadata.create_all(model._engine)
    d = DAO()

    d.signup("Luso", "*****@*****.**", "1111", "666-666")

    t1 = model.Table("4")
    t2 = model.Table("8")
    t3 = model.Table("2")
    t4 = model.Table("8")
    t5 = model.Table("4")

    p1 = model.Product("Ensalada César", "Una ensalada riquísima", "en", 8)
    p2 = model.Product("Filete César", "Un filete riquísimo", "ca", 12)
    p3 = model.Product("Pasta César", "Una pasta riquísima", "pa", 9)
    p4 = model.Product("Pizza César", "Una pizza riquísima", "pi", 15)

    session = model.loadSession()

    session.add(t1)
    session.add(t2)
    session.add(t3)
    session.add(t4)
    session.add(t5)

    session.add(p1)
    session.add(p2)
    session.add(p3)
    session.add(p4)

    session.commit()
Example #6
0
 def updateCrawlLog(self, item):
     session = loadSession()
     log = session.query(SpiderCrawlLog).filter(
         SpiderCrawlLog.spiderID == item["rule_id"]
         and SpiderCrawlLog.status == "Running...").first()
     log.items = int(log.items) + 1
     session.commit()
Example #7
0
    def save_match500_bifen(self, item):
        session = loadSession()

        bifen = Match500Bifen()
        bifen.match_id = item['match_id']
        bifen.comp = item['comp']
        bifen.one_zero = item['one_zero']
        bifen.two_zero = item['two_zero']
        bifen.two_one = item['two_one']
        bifen.three_zero = item['three_zero']
        bifen.three_one = item['three_one']
        bifen.three_two = item['three_two']
        bifen.four_zero = item['four_zero']
        bifen.four_one = item['four_one']
        bifen.four_two = item['four_two']
        bifen.four_three = item['four_three']
        bifen.zero_one = item['zero_one']
        bifen.zero_two = item['zero_two']
        bifen.one_two = item['one_two']
        bifen.zero_three = item['zero_three']
        bifen.one_three = item['one_three']
        bifen.two_three = item['two_three']
        bifen.zero_four = item['zero_four']
        bifen.one_four = item['one_four']
        bifen.two_four = item['two_four']
        bifen.three_four = item['three_four']
        bifen.zero_zero = item['zero_zero']
        bifen.one_one = item['one_one']
        bifen.two_two = item['two_two']
        bifen.three_three = item['three_three']
        bifen.four_four = item['four_four']

        session.add(bifen)
        session.commit()
Example #8
0
def run():
    settings = get_project_settings()

    try:
        session = loadSession()
        # 取出规则表中已激活的rule
        rules = session.query(Rule).filter(Rule.enable == 1)
    except Exception, e:
        logging.info("Error: %s" % str(e))
Example #9
0
 def process_item(self, item, spider):
     if spider.name=='yingyongbao':
         model = ttsrc.YingYongBao(
             name=item['name'],
         )
         session = loadSession()
         session.add(model)
         session.commit()
         return item
Example #10
0
    def spider_closed(self, spider):
        print "spider is closed!"
        session = loadSession()
        log = session.query(SpiderCrawlLog).filter(
            SpiderCrawlLog.spiderID == self.rule.id
            and SpiderCrawlLog.endTime is None).first()
        log.endTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        session.commit()

        pass
Example #11
0
 def process_item(self, item, spider):
     a = proxy.Proxy(
         date=item['book_comment_date'],
         score=item['book_score'],
         comment=item['book_comment'],
     )
     session = loadSession()
     session.add(a)
     session.commit()
     return item
Example #12
0
    def save_match500_daxiao(self, item):
        session = loadSession()

        daxiao = Match500Daxiao()
        daxiao.match_id = item['match_id']
        daxiao.comp = item['comp']
        daxiao.handicap = item['handicap']
        daxiao.over = item['over']
        daxiao.under = item['under']

        session.add(daxiao)
        session.commit()
Example #13
0
    def save_match500_ouzhi(self, item):
        session = loadSession()

        ouzhi = Match500Ouzhi()
        ouzhi.match_id = item['match_id']
        ouzhi.comp = item['comp']
        ouzhi.returns = item['returns']
        ouzhi.win = item['win']
        ouzhi.draw = item['draw']
        ouzhi.lose = item['lose']
        ouzhi.kelly_win = item['kelly_win']
        ouzhi.kelly_draw = item['kelly_draw']
        ouzhi.kelly_lose = item['kelly_lose']

        session.add(ouzhi)
        session.commit()
Example #14
0
    def save_match500_rangqiu(self, item):
        session = loadSession()

        rangqiu = Match500Rangqiu()
        rangqiu.match_id = item['match_id']
        rangqiu.comp = item['comp']
        rangqiu.handicap = item['handicap']
        rangqiu.returns = item['returns']
        rangqiu.win = item['win']
        rangqiu.draw = item['draw']
        rangqiu.lose = item['lose']
        rangqiu.kelly_win = item['kelly_win']
        rangqiu.kelly_draw = item['kelly_draw']
        rangqiu.kelly_lose = item['kelly_lose']

        session.add(rangqiu)
        session.commit()
Example #15
0
 def save_match500(self, item):
     session = loadSession()
     session.query(Match500).filter(Match500.id == item['id']).delete()
     match = Match500()
     match.id = item['id']
     match.round_txt = item['round_txt']
     match.start_time = item['start_time']
     match.home_txt = item['home_txt']
     match.away_txt = item['away_txt']
     match.home = item['home']
     match.away = item['away']
     match.home_goal = item['home_goal']
     match.away_goal = item['away_goal']
     match.home_short = item['home_short']
     match.away_short = item['away_short']
     session.add(match)
     session.commit()
Example #16
0
 def _save(self, proxy):
     """ make a new session each time to save data for Thread safe """
     avail = FilterIP(
         ip=proxy.ip,
         port=proxy.port,
         type=proxy.type.upper(),
         level=proxy.level,
         location=proxy.location,
         speed=proxy.speed,
         source=proxy.source,
         rule_name=proxy.rule_name,
         update=str(datetime.datetime.now()).split('.')[0],
     )
     session = loadSession()
     session.merge(avail)
     session.commit()
     session.remove()
Example #17
0
 def process_item(self, item, spider):
     if len(item['ip_port']):
         a = Proxy(ip_port=item['ip_port'],
                   type=item['type'],
                   level=item['level'],
                   location=item['location'],
                   speed=item['speed'],
                   lifetime=item['lifetime'],
                   lastcheck=item['lastcheck'],
                   rule_id=item['rule_id'],
                   source=item['source'])
         session = loadSession()
         try:
             session.merge(a)
             session.commit()
         except MySQLdb.IntegrityError, e:
             log.msg("MySQL Error: %s" % str(e), _level=logging.WARNING)
         return item
Example #18
0
    def save_match500_history(self, item):
        session = loadSession()

        session.query(Match500History).filter(
            Match500History.match_id == item['match_id'],
            Match500History.history_url == item['history_url']).delete()

        history = Match500History()
        history.match_id = item['match_id']
        history.league_name = item['league_name']
        history.start_date = item['start_date']
        history.history_url = item['history_url']
        history.home = item['home']
        history.away = item['away']
        history.home_goal = item['home_goal']
        history.away_goal = item['away_goal']

        session.add(history)
        session.commit()
Example #19
0
    def spider_opened(self, spider):
        print "spider is running!"
        item = SpiderCrawlLog(
            spiderID=self.rule.id,
            spiderName=self.rule.name,
            status="Running...",
            startTime=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            endTime=None,
            pages=0,
            items=0)
        session = loadSession()
        log = session.query(SpiderCrawlLog).filter(
            SpiderCrawlLog.spiderID == self.rule.id
            and SpiderCrawlLog.endTime is None)

        # 查询当前spider是否有未结束的日志
        if len(log) == 0:
            session.add(item)
            session.commit()
        else:
            pass
Example #20
0
 def save_bodan(self, item):
     session = loadSession()
     bodan500 = Bodan500()
     bodan500.league = item['league']
     bodan500.start_time = item['start_time']
     bodan500.home = item['home']
     bodan500.away = item['away']
     bodan500.home_goal = item['home_goal']
     bodan500.away_goal = item['away_goal']
     bodan500.odds_comp = item['odds_comp']
     bodan500.one_zero = item['one_zero']
     bodan500.two_zero = item['two_zero']
     bodan500.two_one = item['two_one']
     bodan500.three_zero = item['three_zero']
     bodan500.three_one = item['three_one']
     bodan500.three_two = item['three_two']
     bodan500.four_zero = item['four_zero']
     bodan500.four_one = item['four_one']
     bodan500.four_two = item['four_two']
     bodan500.four_three = item['four_three']
     bodan500.zero_one = item['zero_one']
     bodan500.zero_two = item['zero_two']
     bodan500.one_two = item['one_two']
     bodan500.zero_three = item['zero_three']
     bodan500.one_three = item['one_three']
     bodan500.two_three = item['two_three']
     bodan500.zero_four = item['zero_four']
     bodan500.one_four = item['one_four']
     bodan500.two_four = item['two_four']
     bodan500.three_four = item['three_four']
     bodan500.zero_zero = item['zero_zero']
     bodan500.one_one = item['one_one']
     bodan500.two_two = item['two_two']
     bodan500.three_three = item['three_three']
     bodan500.four_four = item['four_four']
     bodan500.returns = item['returns']
     session.add(bodan500)
     session.commit()
Example #21
0
    def save_match500_recent(self, item):
        session = loadSession()

        session.query(Match500Recent).filter(
            Match500Recent.match_id == item['match_id'],
            Match500Recent.team_type == item['team_type'],
            Match500Recent.recent_type == item['recent_type'],
            Match500Recent.recent_url == item['recent_url']).delete()

        recent = Match500Recent()
        recent.match_id = item['match_id']
        recent.team_type = item['team_type']
        recent.recent_type = item['recent_type']
        recent.recent_url = item['recent_url']
        recent.league_name = item['league_name']
        recent.start_date = item['start_date']
        recent.home = item['home']
        recent.away = item['away']
        recent.home_goal = item['home_goal']
        recent.away_goal = item['away_goal']

        session.add(recent)
        session.commit()
Example #22
0
class Vendor:

    vendor = model.Vendor
    session = model.loadSession()
    res = session.query(vendor).all()

    @staticmethod
    def get_principal_vendors():
        principals = []
        for item in Vendor.res:
            if item.vendor_principal == 'TRUE':
                principals.append(item.vendor_name)
        return principals

    @staticmethod
    def get_vendor_ids():
        vendors = []
        for item in Vendor.res:
            vendors.append(item.vendor_id)
        return vendors

    @staticmethod
    def auto_complete(text):
        vendors = []
        selection = Vendor.session.query(Vendor.vendor).filter(
            Vendor.vendor.vendor_id.like(str(text)))
        for item in selection:
            vendors.append(item)
        return vendors

    @staticmethod
    def get_vendor_info(vendor):
        session = model.loadSession()
        for result in session.query(
                model.Vendor).filter(model.Vendor.vendor_id == vendor):
            return result.vendor_id, result.vendor_name, result.vendor_address1, result.vendor_address2, result.vendor_city, result.vendor_state, result.vendor_zip
        session.close()
Example #23
0
 def get_latest_sid(stock):
     with loadSession() as session:
         rs = session.query(
             StockByDeal.sid).filter(StockByDeal.sym == stock).order_by(
                 StockByDeal.trading_date).limit(1).all()
     return rs if rs else 0
Example #24
0
    def parse_item(self, response):
        # print 'Hi, this is an item page! %s' % response.url
        # print response.body

        session = loadSession()
        log = session.query(SpiderCrawlLog).filter(
            SpiderCrawlLog.spiderID == self.rule.id
            and SpiderCrawlLog.status == "Running...").first()
        log.pages = int(log.pages) + 1
        session.commit()

        item = IpProxyPoolItem()

        if len(self.rule.loop_xpath):
            # print 'Find %d items!'% len(response.xpath(self.rule.loop_xpath))
            for proxy in response.xpath(self.rule.loop_xpath):
                if len(self.rule.ip_xpath):
                    tmp_ip = proxy.xpath(self.rule.ip_xpath).extract_first()
                    ip = tmp_ip.strip() if tmp_ip is not None else ""
                else:
                    ip = ""
                if len(self.rule.port_xpath):
                    tmp_port = proxy.xpath(
                        self.rule.port_xpath).extract_first()
                    port = tmp_port.strip() if tmp_port is not None else ""
                else:
                    port = ""
                if len(self.rule.location1_xpath):
                    tmp_location1 = proxy.xpath(
                        self.rule.location1_xpath).extract_first()
                    location1 = tmp_location1.strip(
                    ) if tmp_location1 is not None else ""
                else:
                    location1 = ""
                if len(self.rule.location2_xpath):
                    tmp_location2 = proxy.xpath(
                        self.rule.location2_xpath).extract_first()
                    location2 = tmp_location2.strip(
                    ) if tmp_location2 is not None else ""
                else:
                    location2 = ""
                if len(self.rule.lifetime_xpath):
                    tmp_lifetime = proxy.xpath(
                        self.rule.lifetime_xpath).extract_first()
                    lifetime = tmp_lifetime.strip(
                    ) if tmp_lifetime is not None else ""
                else:
                    lifetime = ""
                if len(self.rule.lastcheck_xpath):
                    tmp_lastcheck = proxy.xpath(
                        self.rule.lastcheck_xpath).extract_first()
                    lastcheck = tmp_lastcheck.strip(
                    ) if tmp_lastcheck is not None else ""
                else:
                    lastcheck = ""
                if len(self.rule.level_xpath):
                    tmp_level = proxy.xpath(
                        self.rule.level_xpath).extract_first()
                    level = tmp_level.strip() if tmp_level is not None else ""
                else:
                    level = ""
                if len(self.rule.type_xpath):
                    tmp_type = proxy.xpath(
                        self.rule.type_xpath).extract_first()
                    type = tmp_type.strip() if tmp_type is not None else ""
                else:
                    type = ""
                if len(self.rule.speed_xpath):
                    tmp_speed = proxy.xpath(
                        self.rule.speed_xpath).extract_first()
                    speed = tmp_speed.strip() if tmp_speed is not None else ""
                else:
                    speed = ""

                item['ip_port'] = (":".join([ip, port])) if len(port) else ip
                item['type'] = type
                item['level'] = level
                item['location'] = (" ".join([
                    location1, location2
                ])) if location2 is not None and len(location2) else location1
                item['speed'] = speed
                item['lifetime'] = lifetime
                item['lastcheck'] = lastcheck
                item['rule_id'] = self.rule.id
                item['source'] = response.url

                yield item
Example #25
0
 def get_vendor_info(vendor):
     session = model.loadSession()
     for result in session.query(
             model.Vendor).filter(model.Vendor.vendor_id == vendor):
         return result.vendor_id, result.vendor_name, result.vendor_address1, result.vendor_address2, result.vendor_city, result.vendor_state, result.vendor_zip
     session.close()
Example #26
0
            update=str(datetime.datetime.now()).split('.')[0],
        )
        session = loadSession()
        session.merge(avail)
        session.commit()
        session.remove()

    def close(self):
        self.session.close()

    def format_time(self, time):
        new = str(time)
        if '.' in new:
            new = new.split('.')[0]
        return datetime.datetime.strptime(new, '%Y-%m-%d %H:%M:%S')


if __name__ == '__main__':
    start = time.time()
    t = Filter(http_test_url='http://www.xicidaili.com/')
    t.start()
    t.delete_old()
    t.close()
    cost = time.time() - start
    print('Cost %s secs.' % (time.time() - start))
    session = loadSession()
    record = FilterRecord(filter_count=t.count, filter_time=str(cost))
    session.add(record)
    session.commit()
    session.close()
Example #27
0
 def get_cust_info(customer_id):
     session = model.loadSession()
     for result in session.query(model.Customer).filter(
             model.Customer.customer_id == customer_id):
         return result.customer_name, result.address1, result.address2, result.city, result.state, result.zip
     session.close()
Example #28
0
class Filter(object):
    session = loadSession()
    headers = {
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/64.0.3282.167 Chrome/64.0.3282.167 Safari/537.36'
    }
    https_test_url = 'https://www.baidu.com'
    http_test_url = 'http://www.meizitu.com'

    def __init__(self, https_test_url=None, http_test_url=None, timeout=10):
        """
        Filter ip
        :param https_test_url: the test https url you wanted, default www.baidu.com.
        :param http_test_url: the test http url you wanted, default www.meizitu.com.
        :param timeout: request timeout, default 10 secs.
        """
        self.timeout = timeout
        if https_test_url is not None:
            self.https_test_url = https_test_url
        if http_test_url is not None:
            self.http_test_url = http_test_url
        self.count = 0

    def start(self):
        """ use threadpool to filter ip """
        data = self._get_data()
        pool = ThreadPool(8)
        pool.map(self._filter, data)
        pool.close()
        pool.join()

    def delete_old(self):
        """ delete unavailable ip in FilterIP which have not updated recently """
        old = self.session.query(FilterIP).all()
        for each in old:
            now = self.format_time(datetime.datetime.now())
            compare = self.format_time(each.update)
            d = now - compare
            if d.seconds > 18000:  # 5 hours
                self.session.delete(each)
        self.session.commit()

    def _filter(self, proxy):
        """ to save time, just request response's head instead of whole response's body"""
        if 'HTTPS' in proxy.type.upper():
            proxies = {'https': proxy.ip + ':' + proxy.port}
            url = self.https_test_url
        else:
            proxies = {'http': proxy.ip + ':' + proxy.port}
            url = self.http_test_url
        try:
            requests.head(url=url,
                          headers=self.headers,
                          proxies=proxies,
                          timeout=self.timeout,
                          verify=False)
            print('Successs: %s.' % (proxy.ip + ':' + proxy.port), ' Type: ',
                  proxy.type)
            self._save(proxy)
            print('Get!!!', proxy.ip, proxy.port)
        except Exception as e:
            print('Failed: ', proxy.ip + ':' + proxy.port, ' Type: ',
                  proxy.type)

    def _get_data(self):
        """ get proxy in last two days in Proxy table """
        data = self.session.query(Proxy).all()
        filter_data = []
        for proxy in data:
            now = self.format_time(datetime.datetime.now())
            compare = self.format_time(proxy.update)
            d = now - compare
            if d.seconds < 7200:  # 2 hours
                filter_data.append(proxy)
        self.count = len(filter_data)
        return filter_data

    def _save(self, proxy):
        """ make a new session each time to save data for Thread safe """
        avail = FilterIP(
            ip=proxy.ip,
            port=proxy.port,
            type=proxy.type.upper(),
            level=proxy.level,
            location=proxy.location,
            speed=proxy.speed,
            source=proxy.source,
            rule_name=proxy.rule_name,
            update=str(datetime.datetime.now()).split('.')[0],
        )
        session = loadSession()
        session.merge(avail)
        session.commit()
        session.remove()

    def close(self):
        self.session.close()

    def format_time(self, time):
        new = str(time)
        if '.' in new:
            new = new.split('.')[0]
        return datetime.datetime.strptime(new, '%Y-%m-%d %H:%M:%S')
# -*- coding: utf-8 -*-
import model
from DAO import DAO

model._Base.metadata.create_all(model._engine)
d = DAO()

d.signup("Luso", "*****@*****.**", "1111", "666-666")

p1 = model.Product("Red Bull sin azúcar", "La versión sin azúcar de la popular bebida Red Bull", 100.20)
p2 = model.Product("CocaCola Zero", "Bebe CocaCola sin preocuparte por engordar", 60.99)
p3 = model.Product("Fanta Zero", "Bebe Fanta sin preocuparte por engordar", 55.99)

session = model.loadSession()

session.add(p1)
session.add(p2)
session.add(p3)

session.commit()
Example #30
0
 def __init__(self):
     self.session = loadSession()
Example #31
0
 def renew_session(self):
     self.session.close()
     self.session = loadSession()