def get_3rd_party_campaign_phones(camp_info_today):
        running_3rd_camp_phones, running_3rd_camp_ids = set(), Utils.get_running_3rd_party_campaign_ids(camp_info_today)

        for run_id in running_3rd_camp_ids:
            camp_file = Utils.get_raw_campaign_file(config.CAMPAIGN_DIR, run_id)
            running_3rd_camp_phones.update(helper.read_phones_file(camp_file))
        return running_3rd_camp_phones
Example #2
0
 def filter(self, data, config):
     sellVolume = self.getSellVolume(data)
     sellTrend = Utils.trendline(sellVolume['volume'])
   #   print('sellVolumn and trend', sellVolume['volume'], sellTrend)
     buyVolume = self.getBuyVolume(data) 
     buyTrend = Utils.trendline(buyVolume['volume'])
   #   print('buyVolume and trend', buyVolume['volume'], buyTrend)
     return sellTrend < 0 and buyTrend > 0
Example #3
0
        def build_spend_message(to_spend, pk, sequence, txouts):

            spend_msg = Utils.sha256d(
                Utils.serialize(to_spend) + str(sequence) +
                binascii.hexlify(pk).decode() +
                Utils.serialize(txouts)).encode()

            return spend_msg
Example #4
0
    def checkToken(token):

        if token is None:
            return Utils.JsonMessage("Token parameter not found!", 500)
        if not LoginService.validateJWTToken(token):
            return Utils.JsonMessage("Unauthorized", 401)

        return None
 def canTradeToday(self):
   if Utils.isTodayOneDayBeforeWeeklyExpiryDay() == True:
     logging.info('%s: Today is one day before weekly expiry date hence going to trade this strategy', self.getName())
     return True
   if Utils.isTodayWeeklyExpiryDay() == True:
     logging.info('%s: Today is weekly expiry day hence going to trade this strategy today', self.getName())
     return True
   logging.info('%s: Today is neither day before expiry nor expiry day. Hence NOT going to trade this strategy today', self.getName())
   return False
Example #6
0
    def loadAndUpdateStrikesFromFile(strikesFilepath):
        if os.path.exists(strikesFilepath) == False:
            logging.warn(
                'loadAndUpdateStrikesFromFile() stikes Filepath %s does not exist',
                strikesFilepath)
            return
        OptionBuyingStrategy.trades = []
        OptionBuyingStrategy.updatedTrades = []
        tFile = open(strikesFilepath, 'r')
        tradesData = json.loads(tFile.read())
        if not tradesData:
            return
        else:
            while True:
                isDataUpdate = False
                OptionBuyingStrategy.trades = []
                tFile = open(strikesFilepath, 'r')
                tradesData = json.loads(tFile.read())
                for tr in tradesData:

                    trade = OptionBuyingStrategy.convertJSONToTrade(tr)
                    if (trade.isTradeLive == False):
                        #logging.info("As trade is not live , low will be verified")
                        optionQuote = Quotes.getOptionBuyingQuote(
                            trade.tradingSymbol, True)
                        if (OptionBuyingStrategy.isWithinTradingRange(
                                optionQuote.low)):
                            if (
                                    trade.low != optionQuote.low
                            ):  # Check whether the low is still withing range else it has to be updated
                                isDataUpdate = True
                                message = "OptionBuyingStrategy: Low is updated low for {0}".format(
                                    trade.tradingSymbol)
                                Utils.sendMessageTelegramBot(message)
                                logging.info(message)
                                trade.low = optionQuote.low
                                OptionBuyingStrategy.updatedTrades.append(
                                    trade)
                        else:
                            isDataUpdate = True
                            newStrikeTrade = OptionBuyingStrategy.getUpdatedStrike(
                                int(trade.tradingSymbol[-7:-2]),
                                trade.tradingSymbol[-2:])
                            message = "OptionBuyingStrategy: Strike is updated from {0} to {1}".format(
                                trade.tradingSymbol,
                                newStrikeTrade.tradingSymbol)
                            Utils.sendMessageTelegramBot(message)
                            logging.info(message)
                            trade = newStrikeTrade
                    OptionBuyingStrategy.trades.append(trade)
                if (len(OptionBuyingStrategy.trades) != 0 and isDataUpdate):
                    #OptionBuyingStrategy.trades=[]
                    #OptionBuyingStrategy.trades=Test.updatedTrades
                    OptionBuyingStrategy.writeStrikesToFile(
                        OptionBuyingStrategy.trades)

                time.sleep(60)
Example #7
0
 def get_all_disks(self):
     for disk in self.allDisks.split():
         self.disksList.update({
             disk:
             Disk(
                 Utils.produce_command(
                     "sudo fdisk -l | grep /dev/sda: |awk '{print $3$4}'"),
                 Utils.produce_command("cat /sys/block/sd?/device/vendor"))
         })
Example #8
0
 def __init__(self, bot, client,
              torrentsQueue: Queue, ytQueue, megaQueue):
     self.bot = bot
     self.client = client
     self.ytQueue = ytQueue
     self.megaQueue = megaQueue
     self.torrentsQueue = torrentsQueue
     self.utils = Utils()
     self.db = DB()
Example #9
0
    def test_no_details_login(self):
        login_page = LoginPage(self.driver)
        login_page.login('', '')

        utils = Utils(self.driver)
        utils.wait_for_element(login_page.errorMessage)
        # animation still not finished, tried to solve it but couldn't, so added a small extra sleep even if not ideal
        sleep(1)
        assert 'We didn\'t recognize that email and/or password.' in login_page.get_error_message_text()
Example #10
0
 def __init__(self, bot, client, torrentsQueue: Queue, megaQueue: Queue,
              ytQueue: Queue):
     self.bot = bot
     self.client = client
     self.torrentsQueue = torrentsQueue
     self.megaQueue = megaQueue
     self.ytQueue = ytQueue
     self.utils = Utils()
     self.db = DB()
     self.logger = logging.getLogger(' Admin Conv ')
Example #11
0
    def _generate_filtered_promotion_phones_source(self, cid, camp_info_today, today, directory=config.PROMOTION_DIR):
        out_dir = os.path.join(directory, "id{id_num}/{current_day}".format(id_num=cid, current_day=today))  # /path/to/id###/yyyymmdd
        Utils.mkdirs_if_not_exist(out_dir)
        output_file = os.path.join(out_dir, "id{id_num}.filtered.csv".format(id_num=cid))

        promotion_phones = self._filter(today, cid, camp_info_today, directory)

        self._logger.debug("write to file :{0}".format(output_file))
        Utils.write_to_file(promotion_phones, output_file)
        return output_file
Example #12
0
def command(string, fromConsole):
    if string.lower() == 'stop':
        logger.log('info', 'Stopping proxy...')
        Utils.killServer()
    elif string.lower() == '':
        pass
    elif string.lower() == 'help':
        logger.log('info', '/stop: Stops the proxy')
    else:
        logger.log('error', str(base.get("invalidCommand")))
Example #13
0
 def setTradeToCompleted(trade, exit, exitReason=None):
     trade.tradeState = TradeState.COMPLETED
     trade.exit = exit
     trade.exitReason = exitReason if trade.exitReason == None else trade.exitReason
     trade.endTimestamp = Utils.getEpoch()
     trade = Utils.calculateTradePnl(trade)
     logging.info(
         'TradeManager: setTradeToCompleted strategy = %s, symbol = %s, qty = %d, entry = %f, exit = %f, pnl = %f, exit reason = %s',
         trade.strategy, trade.tradingSymbol, trade.filledQty, trade.entry,
         trade.exit, trade.pnl, trade.exitReason)
Example #14
0
 def list(self, page, size, proName, userName, modName, limitTime,
          currentUser):
     """
     查询分配任务记录
     """
     start = (int(page) - 1) * int(size)
     try:
         sql = """
             SELECT task.*, member.PRO_NAME, member.USER_NAME
             FROM man_pro_task AS task
                 LEFT JOIN man_pro_member AS member
                 ON task.PRO_ID = member.PRO_ID
                 AND task.USER_ID = member.USER_ID
             WHERE 1=1
             {}{}{}{}{}
             ORDER BY MODULE_END ASC, CREATE_TIME DESC
             LIMIT {start}, {size};
             """.format(
             ' AND PRO_NAME like "%{}%"'.format(proName) if proName else '',
             ' AND USER_NAME like "%{}%"'.format(userName)
             if userName else '',
             ' AND MODULE_NAME like "%{}%"'.format(modName)
             if modName else '',
             ' AND MODULE_END <= "{}"'.format(limitTime)
             if limitTime else '',
             ' AND USER_CREATOR = "{}"'.format(currentUser)
             if currentUser else '',
             start=start,
             size=size)
         Utils.log('查询数据库任务列表SQL', sql)
         list = PySQL.get(sql)
         sqlTotal = """
             SELECT COUNT(1)
             FROM man_pro_task AS task
                 LEFT JOIN man_pro_member AS member
                 ON task.PRO_ID = member.PRO_ID
                 AND task.USER_ID = member.USER_ID
             WHERE 1=1
             {}{}{}{}{}
             """.format(
             ' AND PRO_NAME like "%{}%"'.format(proName) if proName else '',
             ' AND USER_NAME like "%{}%"'.format(userName)
             if userName else '',
             ' AND MODULE_NAME = "{}"'.format(modName) if modName else '',
             ' AND MODULE_END <= "{}"'.format(limitTime)
             if limitTime else '',
             ' AND USER_CREATOR = "{}"'.format(currentUser)
             if currentUser else '',
         )
         total = PySQL.count(sqlTotal)
         return list, total
     except Exception as e:
         print('ERROR {}'.format(e))
         Utils.log('ERROR {}'.format(e))
         return [], 0
Example #15
0
 def status(self, bugId, status, currentUser):
     """
     查询调试记录列表
     """
     try:
         bugSQL = """
             UPDATE man_pro_bug
             SET
                 BUG_STATUS='{status}',
                 HANDLE_BY='{handleBy}',
                 HANDLE_TIME='{handleTime}'
             WHERE BUG_ID='{bugId}';
             """.format(status='created' if status == 'reset' else status,
                        handleBy=currentUser,
                        handleTime=Utils.getLocalTime(),
                        bugId=bugId)
         recordSQL = """
             INSERT INTO man_bug_record(
                 ID,
                 BUG_ID,
                 BUG_STATUS,
                 HANDLE_BY,
                 HANDLE_TIME
             )VALUES(
                 '{}','{}','{}','{}','{}'
             );
             """.format(Utils.makeId(), bugId, status, currentUser,
                        Utils.getLocalTime())
         Utils.log('更新问题状态SQL', bugSQL)
         Utils.log('插入调试记录SQL', recordSQL)
         return PySQL.execute(bugSQL) > 0 and PySQL.execute(recordSQL) > 0
     except Exception as e:
         print('ERROR {}'.format(e))
         Utils.log('ERROR {}'.format(e))
         return False
Example #16
0
 def __init__(self, vmc_file, user_agent, is_file=False):
     self.STORAGE_CERT_DIR = Config.STORAGE_CERT_DIR
     self.vmc_file = vmc_file
     self.Utils = Utils()
     self.vmc_response = {
         "status": False,
         "errors": [],
         "vmc_link": vmc_file
     }
     self.is_file = is_file
     self.user_agent = user_agent
Example #17
0
def subProcessTask(df_today, result, start, sm, stockManager, engine, setting,
                   todayStr):
    for index, row in df_today.iterrows():
        # sleep(0.1)
        code = row['code']
        print('thread %s run %s' % (threading.current_thread().name, code))

        def cb(**kw):
            return ts.get_k_data(kw['kw']['code'], start=kw['kw']['start'])

        df_3m = Utils.queryData('k_data_' + code,
                                'code',
                                engine,
                                cb,
                                forceUpdate=setting.get_updateToday(),
                                code=code,
                                start=start)
        if df_3m is None:
            continue
        # 数据少于180天
        if df_3m.empty or len(df_3m) < setting.get_trendPeriod():
            continue
        #添加最后一行
        if df_3m.iloc[-1].get('date') != todayStr:
            today_df = pd.DataFrame([[
                todayStr, row['open'], row['trade'], row['high'], row['low'],
                row['volume'] / 100, code
            ]],
                                    columns=list([
                                        'date', 'open', 'close', 'high', 'low',
                                        'volume', 'code'
                                    ]))
            df_3m = df_3m.append(today_df, ignore_index=True)
        # df_3m = df_3m[::-1]
        #计算指标使用半年D数据
        Utils.macd(df_3m)
        Utils.myKdj(df_3m)
        ######################################开始配置计算###########################################
        data = {
            'code': code,
            'df_3m': df_3m,
            'df_realTime': row,
            'engine': engine,
            'macd': None,
            'kdj': None,
            'ma': None,
            'turnover': None,
            'volume': None,
            'bigMoney': None,
            'concept': None
        }
        if sm.start(code, setting.get_Strategy(), data, setting):
            #    buildStockModels(code,data,stockManager)
            result.append(code)
Example #18
0
    def getList(self, page, size, proId, userCreator, userId, userName):
        """
        查询数据库 user 列表
        """
        start = (int(page) - 1) * int(size)
        try:
            sql = """
                SELECT user.*
                FROM man_auth_user AS user
                WHERE (user.USER_CREATOR='{userCreator}' OR user.USER_ID='{userCreator}')
                {id}{name}
                AND user.USER_ID NOT IN (
                    SELECT member.USER_ID
                    FROM man_pro_member AS member
                    WHERE member.USER_CREATOR='{userCreator}'
                    AND member.PRO_ID='{proId}'
                )
                ORDER BY USER_ID
                LIMIT {start}, {size};
                """.format(
                userCreator=userCreator,
                proId=proId,
                id=' AND USER_ID like "%{}%"'.format(userId) if userId else '',
                name=' AND USER_NAME like "%{}%"'.format(userName)
                if userName else '',
                start=start,
                size=size)

            sqlTotal = """
                SELECT COUNT(1)
                FROM man_auth_user AS user
                WHERE (user.USER_CREATOR='{userCreator}' OR user.USER_ID='{userCreator}')
                {id}{name}
                AND user.USER_ID NOT IN (
                    SELECT member.USER_ID
                    FROM man_pro_member AS member
                    WHERE member.USER_CREATOR='{userCreator}'
                    AND member.PRO_ID='{proId}'
                )
                ORDER BY USER_ID;
                """.format(
                userCreator=userCreator,
                proId=proId,
                id=' AND USER_ID like "%{}%"'.format(userId) if userId else '',
                name=' AND USER_NAME like "%{}%"'.format(userName)
                if userName else '')
            Utils.log('查询项目成员列表 {}'.format(sql))
            list = PySQL.get(sql)
            total = PySQL.count(sqlTotal)
            return list, total
        except Exception as e:
            print('ERROR {}'.format(e))
            Utils.log('ERROR {}'.format(e))
            return [], 0
Example #19
0
 def __init__(self, svg_file, user_agent, is_file=False):
     self.RNG_SCHEMA_FILE = Config.RNG_SCHEMA_FILE
     self.STORAGE_SVG_DIR = Config.STORAGE_SVG_DIR
     self.svg_file = svg_file
     self.Utils = Utils()
     self.svg_response = {
         "status": False,
         "errors": [],
         "svg_link": svg_file
     }
     self.is_file = is_file
     self.user_agent = user_agent
Example #20
0
    def _generate_filtered_3rd_party_campaign_source(self, cid, given_date, directory=config.CAMPAIGN_DIR):
        output_directory = os.path.join(directory, "id{id_num}/{current_day}".format(id_num=cid, current_day=given_date))
        Utils.mkdirs_if_not_exist(output_directory)
        output_file = output_directory + "/" + "id{id_num}.filtered.csv".format(id_num=cid)

        filtered_phones = helper.read_phones_file(Utils.get_raw_campaign_file(config.CAMPAIGN_DIR, cid))

        if time.strftime("%Y%m%d") == given_date:
            self._logger.info("[stats]campaign id{0} has {1} base subs, {2} eligible subs".format(cid, len(filtered_phones), len(filtered_phones)))

        Utils.write_to_file(filtered_phones, output_file)
        return output_file
Example #21
0
 def getList(self):
     """
     查询 projec type 列表
     """
     try:
         sql = "SELECT CONVERT(TYPE_ID, char) AS TYPE_ID, TYPE_TEXT FROM man_pro_type;"
         list = PySQL.get(sql)
         return list
     except Exception as e:
         print('ERROR {}'.format(e))
         Utils.log('ERROR {}'.format(e))
         return []
Example #22
0
 def updatePwd(self, id, pwd, new):
     """
     检查登录信息,修改账户密码
     """
     try:
         sql = "UPDATE man_auth_user SET USER_PWD='{2}' WHERE USER_ID='{0}' AND USER_PWD='{1}';".format(
             id, pwd, new)
         return PySQL.execute(sql)
     except Exception as e:
         print('ERROR {}'.format(e))
         Utils.log('ERROR {}'.format(e))
         return 0
Example #23
0
	def __init__(self,domain):
		self.approved_nameservers = None
		self.approved_mx_hostnames = None
		self.skip_tls = True
		self.include_dmarc_tag_descriptions = False
		self.nameservers = None
		self.timeout = 2.0
		self.parked = False
		# self.wait=0.0
		self.Utils = Utils()
		self.domain_results = {}
		self.domain = domain.rstrip(".\r\n").strip().lower()
		self.base_domain  = checkdmarc.get_base_domain(self.domain)
Example #24
0
 def getList(self, page, size, userId, userName, userAuth, userLogin,
             userCreator):
     """
     查询数据库用户列表
     """
     start = (int(page) - 1) * int(size)
     try:
         sql = """
             SELECT USER_ID,USER_NAME,USER_AUTH,USER_LOGIN,USER_CREATOR,USER_CRE_TIME
             FROM man_auth_user
             WHERE 1=1
             {}{}{}{}{}{}
             ORDER BY USER_AUTH, USER_ID
             LIMIT {start}, {size};
             """.format(
             ' AND USER_ID like "%{}%"'.format(userId) if userId else '',
             ' AND USER_Name like "%{}%"'.format(userName)
             if userName else '',
             ' AND USER_AUTH = "{}"'.format(userAuth) if userAuth else '',
             ' AND USER_LOGIN = "******"'.format(userLogin)
             if userLogin else '',
             ' AND USER_CREATOR = "{}"'.format(userCreator)
             if userCreator and userCreator != 'admin' else '',
             ' OR USER_ID = "{}"'.format(userCreator)
             if userCreator and userCreator != 'admin' else '',
             start=start,
             size=size)
         Utils.log('查询数据库用户列表SQL', sql)
         list = PySQL.get(sql)
         sqlTotal = """
             SELECT COUNT(1) FROM man_auth_user
             WHERE 1=1
             {}{}{}{}{}{};
             """.format(
             ' AND USER_ID like "%{}%"'.format(userId) if userId else '',
             ' AND USER_Name like "%{}%"'.format(userName)
             if userName else '',
             ' AND USER_AUTH = "{}"'.format(userAuth) if userAuth else '',
             ' AND USER_LOGIN = "******"'.format(userLogin)
             if userLogin else '',
             ' AND USER_CREATOR = "{}"'.format(userCreator)
             if userCreator and userCreator != 'admin' else '',
             ' OR USER_ID = "{}"'.format(userCreator)
             if userCreator and userCreator != 'admin' else '',
         )
         total = PySQL.count(sqlTotal)
         return list, total
     except Exception as e:
         print('ERROR {}'.format(e))
         Utils.log('ERROR {}'.format(e))
         return [], 0
Example #25
0
 def __init__(self, vmc_file, user_agent, svg_link='', is_file=False):
     self.STORAGE_CERT_DIR = Config.STORAGE_CERT_DIR
     self.vmc_file = vmc_file
     self.Utils = Utils()
     self.vmc_response = {
         "status": False,
         "errors": [],
         "vmc_link": vmc_file
     }
     self.is_file = is_file
     self.user_agent = user_agent
     self.parsed_vmc = None
     self.svg_link = svg_link
     self.pem_file_path = None
Example #26
0
 def getInfo(self, page, size, proId, userId, userName):
     """
     查询数据库 project-user 列表
     """
     start = (int(page) - 1) * int(size)
     try:
         sql = """
             SELECT mem.*, task.MODULE_ID, task.MODULE_NAME
             FROM man_pro_member AS mem
             LEFT JOIN man_pro_task AS task
             ON mem.PRO_ID = task.PRO_ID
             AND mem.USER_ID = task.USER_ID
             WHERE mem.PRO_ID='{proId}'
             {id}{name}
             ORDER BY mem.USER_ID
             LIMIT {start}, {size};
             """.format(
             proId=proId,
             id=' AND mem.USER_ID like "%{}%"'.format(userId)
             if userId else '',
             name=' AND mem.USER_NAME like "%{}%"'.format(userName)
             if userName else '',
             start=start,
             size=size)
         sqlTotal = """
             SELECT COUNT(1)
             FROM man_pro_member AS mem
             LEFT JOIN man_pro_task AS task
             ON mem.PRO_ID = task.PRO_ID
             AND mem.USER_ID = task.USER_ID
             WHERE mem.PRO_ID='{proId}'
             {id}{name}
             ORDER BY mem.USER_ID
             LIMIT {start}, {size};
             """.format(
             proId=proId,
             id=' AND mem.USER_ID like "%{}%"'.format(userId)
             if userId else '',
             name=' AND mem.USER_NAME like "%{}%"'.format(userName)
             if userName else '',
             start=start,
             size=size)
         list = PySQL.get(sql)
         total = PySQL.count(sqlTotal)
         Utils.log('查询项目成员负责模块 {}'.format(sql))
         return list, total
     except Exception as e:
         print('ERROR {}'.format(e))
         Utils.log('ERROR {}'.format(e))
         return [], 0
Example #27
0
 def detection(self, session):
     r = session.get(self.logined_url, headers=self.headers3)
     Utils.delay(constants.DELAY_MIN_SECOND, constants.DELAY_MAX_SECOND)
     if r.status_code == 200:
         r.encoding = 'utf-8'
         html = etree.HTML(r.text)
         name = html.xpath(
             '//*[@id="profile"]/div/div[2]/div[1]/div/div/text()[1]')
         if not name:
             return 1
         else:
             return 0
     else:
         return 1
Example #28
0
    def testOrders():
        orderManager = ZerodhaOrderManager()
        exchange = 'NSE'
        tradingSymbol = 'SBIN'
        lastTradedPrice = Quotes.getCMP(exchange + ':' + tradingSymbol)
        logging.info(tradingSymbol + ' CMP = %f', lastTradedPrice)

        limitPrice = lastTradedPrice - lastTradedPrice * 1 / 100
        limitPrice = Utils.roundToNSEPrice(limitPrice)
        qty = 1
        direction = 'LONG'

        # place order
        origOrderId = orderManager.placeOrder(tradingSymbol, limitPrice, qty,
                                              direction)
        logging.info('Original order Id %s', origOrderId)

        # sleep for 10 seconds then modify order
        time.sleep(10)
        newPrice = lastTradedPrice
        if origOrderId:
            orderManager.modifyOrder(origOrderId, newPrice)

        # sleep for 10 seconds and then place SL order
        time.sleep(10)
        slPrice = newPrice - newPrice * 1 / 100
        slPrice = Utils.roundToNSEPrice(slPrice)
        slDirection = 'SHORT' if direction == 'LONG' else 'LONG'
        slOrderId = orderManager.placeSLOrder(tradingSymbol, slPrice, qty,
                                              slDirection)
        logging.info('SL order Id %s', slOrderId)

        # sleep for 10 seconds and then place target order
        time.sleep(10)
        targetPrice = newPrice + newPrice * 2 / 100
        targetPrice = Utils.roundToNSEPrice(targetPrice)
        targetDirection = 'SHORT' if direction == 'LONG' else 'LONG'
        targetOrderId = orderManager.placeOrder(tradingSymbol, targetPrice,
                                                qty, targetDirection)
        logging.info('Target order Id %s', targetOrderId)

        # sleep for 10 seconds and cancel target order
        time.sleep(10)
        if targetOrderId:
            orderManager.cancelOrder(targetOrderId)
            logging.info('Cancelled Target order Id %s', targetOrderId)

        logging.info(
            "Algo done executing all orders. Check ur orders and positions in broker terminal."
        )
Example #29
0
 def validInsert(self, userId):
     """
     检查用户名是否存在
     """
     count = 0
     try:
         sql = """
             SELECT COUNT(1) FROM man_auth_user
             WHERE USER_ID="{}";
             """.format(userId)
         count = PySQL.count(sql)
     except Exception as e:
         print('ERROR {}'.format(e))
         Utils.log('ERROR {}'.format(e))
     return count > 0
Example #30
0
def run():
    helptool = HelpTool()
    cookieList = helptool.getCookie()
    base_url = 'https://movie.douban.com/top250'
    headers = {'User-Agent': random.choice(constants.USER_AGENT)}
    doubanIds = []
    savePath = 'data/top250id.txt'
    # 指明当前用第几个cookie
    index = 0
    # 获取豆瓣TOP250id数据
    r = requests.get(
        base_url,
        headers=headers,
        cookies=cookieList[index]
    )
    r.encoding = 'utf-8'
    while True:
        html = etree.HTML(r.text)
        result = html.xpath('//div[@class="item"]')
        # 如果获取的数据为空,延时以减轻对目标服务器的压力,并跳过。
        if not result:
            continue
        for item in result:
            doubanid = item.xpath('div/div[@class="hd"]/a/@href')
            if doubanid:
                tmp = doubanid[0].strip().split('/')
                if tmp[-1] == '':
                    value = tmp[-2]
                else:
                    value = tmp[-1]
                doubanIds.append(value)
                print('----------电影id ' + value + ':爬取成功' + '----------')
        nextUrl = html.xpath('//span[@class="next"]/a/@href')
        if not nextUrl:
            break
        url = base_url+nextUrl[0]
        index += 1
        if index >= constants.UserNum:
            index = 0
            Utils.delay(constants.DELAY_MIN_SECOND, constants.DELAY_MAX_SECOND)
        r = requests.get(
            url,
            headers=headers,
            cookies=cookieList[index]
        )
        r.encoding = 'utf-8'
    if doubanIds:
        helptool.storeFailData(savePath, doubanIds)
Example #31
0
    def mine(cls,
             block: Block,
             mine_interrupt: threading.Event = None) -> Union[Block, None]:
        start = time.time()
        nonce = 0
        target = (1 << (256 - block.bits))
        mine_interrupt.clear()

        logger.info(f'[consensus] mining after block {block.prev_block_hash}')
        while int(Utils.sha256d(block.header(nonce)), 16) >= target:
            nonce += 1

            if mine_interrupt.is_set():
                logger.info(f'[consensus] mining interrupted +++ {nonce}')
                mine_interrupt.clear()
                return None

        block = block._replace(nonce=nonce)
        duration = max(time.time() - start, 0.0001)
        khs = (block.nonce // duration) // 1000
        logger.info(
            f'[consensus] mining block found at nonce={nonce} using {round(duration, 4)} s at rate {khs} KH/s: {block.id}'
        )

        return block
    def __init__(self):
        QtCore.QThread.__init__(self)
        self.isExiting = False
        self.logger = LogManager(__name__)
        self.spider = Spider()
        self.regex = Regex()
        dupCsvReader = Csv()
        self.dupCsvRows = dupCsvReader.readCsvRow("nisbets.csv", 0)
        self.csvWriter = Csv("nisbets.csv")
        self.mainUrl = "http://www.nisbets.co.uk"
        csvHeaderList = [
            "URL",
            "Product Code",
            "Product Technical Specifications",
            "Product Name",
            "Brand",
            "Product Price",
            "Product Short Description",
            "Product Long Description",
            "Image File Name",
            "User Manual File Name",
            "Exploded View File Name",
            "Spares Code",
            "Accessories",
            "Product Status" "Category1",
            "Category2",
            "Category3",
            "Category4",
        ]
        if "URL" not in self.dupCsvRows:
            self.csvWriter.writeCsvRow(csvHeaderList)
            self.dupCsvRows.append(csvHeaderList[0])

        self.utils = Utils()
Example #33
0
 def delete(self, fileId, userCreator):
     """
     删除文件
     """
     count = 0
     try:
         sql = """
             DELETE FROM man_pro_file
             WHERE ID='{}';
             """.format(fileId)
         Utils.log("删除项目文件", sql)
         count = PySQL.execute(sql)
     except Exception as e:
         print('ERROR {}'.format(e))
         Utils.log('ERROR {}'.format(e))
     return count > 0
Example #34
0
def getTigerCodeList():
    def cb(**kw):
        return ts.inst_tops()

    df_tiger = Utils.queryData('tiger', 'code', engine, cb, forceUpdate=True)
    df_tiger = df_tiger[df_tiger['net'] > 0]
    return df_tiger['code'].tolist()
Example #35
0
def get_whitelist_name_and_source_path(cid, camp_info_date, given_date):
    credit_list = camp_info_date[cid]
    min_credit, max_credit = credit_list[0], credit_list[1]

    promotion_filter = PromotionFilter(logger)
    campaign_filter = CampaignFilter(logger)

    return (config.PROMOTION_SET, promotion_filter.get_promotion_source_path(cid, camp_info_date, given_date)) \
        if Utils.is_promotion(min_credit, max_credit) else (
    config.CAMPAIGN_SET, campaign_filter.get_3rd_party_campaign_sourse_path(cid, camp_info_date, given_date))
Example #36
0
def get_update_info(update_ids, camp_info_yesterday, camp_info_today, today, yesterday):
    tuples_list = list()
    for cid in update_ids:
        set_name, yesterday_file = get_whitelist_name_and_source_path(cid, camp_info_yesterday, yesterday)
        set_name, today_file = get_whitelist_name_and_source_path(cid, camp_info_today, today)
        today_set, yesterday_set = set(), set()

        try:
            today_set = helper.read_phones_file(today_file)
            yesterday_set = helper.read_phones_file(yesterday_file)
        except IOError as ie:
            logger.error("file doesn't not exist:{0}".format(ie))

        directory = config.PROMOTION_DIR if set_name == config.PROMOTION_SET else config.CAMPAIGN_DIR
        out_dir = os.path.join(directory, "id{0}/{1}/".format(cid, today))
        output_add, output_remove = os.path.join(out_dir, "add.csv"), os.path.join(out_dir, "remove.csv")
        Utils.write_to_file(today_set - yesterday_set, output_add)
        Utils.write_to_file((yesterday_set - today_set), output_remove)

        id_tuple =(cid, set_name, output_add, output_remove)
        tuples_list.append(id_tuple)
        logger.debug(id_tuple)
    return tuples_list
 def __init__(self):
     QThread.__init__(self)
     self.logger = LogManager(__name__)
     self.spider = Spider()
     self.regex = Regex()
     dupCsvReader = Csv()
     self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 0)
     self.csvWriter = Csv('cs_product.csv')
     self.mainUrl = 'http://www.cs-catering-equipment.co.uk/'
     self.utils = Utils()
     self.csvWriter.writeCsvRow(
         ['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount',
          'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty',
          'Delivery',
          'Product Image',
          'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image'])
     self.totalProducts = 0
Example #38
0
    def startStt(self, audioFd):

        # logging
        log.startLogging(sys.stdout)

        hostname = "stream.watsonplatform.net"
        headers = {}

        # authentication header
        if self.configData["tokenauth"]:
            headers['X-Watson-Authorization-Token'] = Utils.getAuthenticationToken("https://" + hostname,
                                                                                   'speech-to-text',
                                                                                   self.configData["user"],
                                                                                   self.configData["password"])
        else:
            string = self.configData["user"] + ":" + self.configData["password"]
            headers["Authorization"] = "Basic " + base64.b64encode(string)

        # create a WS server factory with our protocol
        url = "wss://" + hostname + "/speech-to-text/api/v1/recognize?model=" + self.configData["model"]
        summary = {}
        factory = WSInterfaceFactory(audioFd,
                                     summary,
                                     self.CONTENT_TYPE,
                                     self.configData["model"],
                                     url,
                                     headers,
                                     debug=False)
        factory.setListeners(self.listeners)
        factory.protocol = WSInterfaceProtocol

        if factory.isSecure:
            contextFactory = ssl.ClientContextFactory()
        else:
            contextFactory = None
        connectWS(factory, contextFactory)

        reactor.run()
address_obj.phone("647-788-3901")
address_obj.recipientName("Jane Doe")
            
profile_obj1 = Profile(None)
profile_obj1.id(response_object.id)
address_obj.profile(profile_obj1)
    
response_object2 = optimal_obj.customer_vault_service_handler().create_address(address_obj)

# Create EFT Bank Account
eftbankaccount_obj = EFTBankAccount(None)
eftbankaccount_obj.nickName("Sally's Bank of Montreal Account")
eftbankaccount_obj.accountHolderName("Sally")
eftbankaccount_obj.accountNumber(RandomTokenGenerator().generateNumber())
eftbankaccount_obj.transitNumber("25039")
eftbankaccount_obj.institutionId("001")
eftbankaccount_obj.billingAddressId(response_object2.id)
    
profile_obj2 = Profile(None)
profile_obj2.id(response_object.id)
    
eftbankaccount_obj.profile(profile_obj2)
response_object3 = optimal_obj.customer_vault_service_handler().create_eft_bank_account(eftbankaccount_obj)


print ('\nProfile Id : ', response_object.id)
print ('\nAddress Id : ', response_object2.id)
print ("\nResponse Values ==========> ")
Utils.print_response(response_object3)

class NisbetProduct(QtCore.QThread):
    scrapProductData = QtCore.pyqtSignal(object)
    stopThread = QtCore.pyqtSignal(int)

    def __init__(self):
        QtCore.QThread.__init__(self)
        self.isExiting = False
        self.logger = LogManager(__name__)
        self.spider = Spider()
        self.regex = Regex()
        dupCsvReader = Csv()
        self.dupCsvRows = dupCsvReader.readCsvRow("nisbets.csv", 0)
        self.csvWriter = Csv("nisbets.csv")
        self.mainUrl = "http://www.nisbets.co.uk"
        csvHeaderList = [
            "URL",
            "Product Code",
            "Product Technical Specifications",
            "Product Name",
            "Brand",
            "Product Price",
            "Product Short Description",
            "Product Long Description",
            "Image File Name",
            "User Manual File Name",
            "Exploded View File Name",
            "Spares Code",
            "Accessories",
            "Product Status" "Category1",
            "Category2",
            "Category3",
            "Category4",
        ]
        if "URL" not in self.dupCsvRows:
            self.csvWriter.writeCsvRow(csvHeaderList)
            self.dupCsvRows.append(csvHeaderList[0])

        self.utils = Utils()

    def run(self):
        self.scrapData()

    def stop(self):
        self.isExiting = True

    def scrapData(self):
        if self.isExiting:
            return
        self.scrapProductData.emit("<font color=green><b>Main URL: </b>%s</font>" % self.mainUrl)
        self.logger.debug("===== URL [" + self.mainUrl + "] =====")
        data = self.spider.fetchData(self.mainUrl)
        if data:
            data = self.regex.reduceNewLine(data)
            data = self.regex.reduceBlankSpace(data)
            category1Chunk = self.regex.getAllSearchedData('(?i)<li id="li-id-\d+">(.*?)</ul> </li>', data)
            if category1Chunk:
                for category1Data in category1Chunk:
                    category1 = self.regex.getSearchedData('(?i)<a href="[^"]*">([^<]*)</a>', category1Data)
                    category2Chunk = self.regex.getAllSearchedData(
                        '(?i)<li><a href="([^"]*)">([^<]*)</a>', category1Data
                    )
                    if category2Chunk:
                        for category2Data in category2Chunk:
                            self.scrapCategory2Data(self.mainUrl + category2Data[0], category1, category2Data[1])
        self.scrapProductData.emit("<font color=red><b>Finish Scraping Product data from %s</b></font>" % self.mainUrl)

    def scrapCategory2Data(self, url, category1, category2):
        if self.isExiting:
            return
        self.scrapProductData.emit("<b>Category 2 URL: </b>%s" % url)
        self.logger.debug("== Category 2 URL [" + url + "] ==")
        data = self.spider.fetchData(url)
        if data:
            data = self.regex.reduceNewLine(data)
            data = self.regex.reduceBlankSpace(data)
            category3Chunks = self.regex.getSearchedData('(?i)<ul class="topCat clear-fix">(.*?)</ul>', data)
            if category3Chunks:
                category3Chunk = self.regex.getAllSearchedData('(?i)<a href="([^"]*)">([^<]*)<', category3Chunks)
                if category3Chunk:
                    for category3Data in category3Chunk:
                        self.scrapCategory3Data(self.mainUrl + category3Data[0], category1, category2, category3Data[1])

    def scrapCategory3Data(self, url, category1, category2, category3):
        if self.isExiting:
            return
        self.scrapProductData.emit("<b>Category 3 URL: </b>%s" % url)
        self.logger.debug("== Category 3 URL [" + url + "] ==")
        data = self.spider.fetchData(url)
        if data:
            data = self.regex.reduceNewLine(data)
            data = self.regex.reduceBlankSpace(data)
            category4Chunks = self.regex.getSearchedData('(?i)<ul class="topCat clear-fix">(.*?)</ul>', data)
            if category4Chunks:
                category4Chunk = self.regex.getAllSearchedData('(?i)<a href="([^"]*)">([^<]*)<', category4Chunks)
                if category4Chunk:
                    for category4Data in category4Chunk:
                        category4Url = self.mainUrl + category4Data[0]
                        self.scrapCategory4Data(category4Url, category1, category2, category3, category4Data[1])

    def scrapCategory4Data(self, url, category1, category2, category3, category4):
        if self.isExiting:
            return
        self.scrapProductData.emit("<b>Category 4 URL: </b>%s" % url)
        self.logger.debug("== Category 4 URL [" + url + "] ==")
        data = self.spider.fetchData(url)
        if data:
            data = self.regex.reduceNewLine(data)
            data = self.regex.reduceBlankSpace(data)

            categoryChunk = self.regex.getAllSearchedData(
                '(?i)<div class="product-list-row clear-after">(.*?)</fieldset>', data
            )
            if categoryChunk:
                for categoryData in categoryChunk:
                    if self.isExiting:
                        return
                    productInfo = self.regex.getSearchedDataGroups(
                        '(?i)<h3 class="product-name"> <a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryData
                    )
                    productUrl = self.mainUrl + productInfo.group(1)
                    productName = productInfo.group(2)
                    if productUrl not in self.dupCsvRows:
                        self.dupCsvRows.append(productUrl)
                    else:
                        self.scrapProductData.emit(
                            "<font color=green><b>Already exists this item in csv Skip it</b></font>"
                        )
                        self.logger.debug("========= Already exists this item Skip it ===========")
                        return

                    productImageInfo = self.regex.getSearchedDataGroups(
                        '(?i)<img class="primaryImage" src="([^"]*)" alt="([^"]*)"', categoryData
                    )
                    image = self.regex.replaceData("(?i)medium", "xlarge", str(productImageInfo.group(1)))
                    productImageUrl = self.mainUrl + image
                    productImage = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_.]*)$", image)
                    self.utils.downloadFile(productImageUrl, "images/" + productImage)
                    productCode = productImageInfo.group(2)
                    productTechSpecs = self.regex.getSearchedData(
                        '(?i)<p class="description">([^<]*)</p>', categoryData
                    )

                    brandName = self.regex.getSearchedData(
                        '(?i)<img class="brand-image" src="[^"]*" alt="([^"]*)"', categoryData
                    )
                    price = self.regex.getSearchedData(
                        '(?i)<div class="reduced-price"> <span class="[^"]*">([^<]*)</span>', categoryData
                    )
                    if price:
                        price = price.strip()[1:]
                    productStatus = self.regex.getSearchedData(
                        '(?i)<div class="availibility"> <img alt="([^"]*)"', categoryData
                    )

                    productDesc = ""
                    productLongDesc = ""
                    spareCodes = ""
                    accessoryCode = ""
                    userManual = ""
                    explodedView = ""
                    self.scrapProductData.emit(
                        "<br /><font color=green><b>Product Details URL: </b>%s</font>" % productUrl
                    )
                    productChunk = self.spider.fetchData(productUrl)
                    if productChunk:
                        productChunk = self.regex.reduceNewLine(productChunk)
                        productChunk = self.regex.reduceBlankSpace(productChunk)
                        productDesc = self.regex.getSearchedData(
                            '(?i)<div class="productDesc"> <h1 class="[^"]*"[^>]*?>[^<]*?</h1>.*?<p>([^<]*)</p>',
                            productChunk,
                        )
                        productLongDesc = self.regex.getSearchedData(
                            '(?i)<div class="info-product[^>]*?>(.*?)</div>', productChunk
                        )

                        otherUrl = self.regex.getSearchedData("(?i)(^.*?/)[a-zA-Z0-9._-]*?$", productUrl)
                        self.logger.debug("== Common Product URL [" + otherUrl + "] ==")
                        sparesUrl = otherUrl + "AjaxProductSpares.raction"
                        self.logger.debug("== Spares URL [" + sparesUrl + "] ==")
                        spares = self.spider.fetchData(sparesUrl)
                        if spares:
                            spares = self.regex.getAllSearchedData(
                                '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', spares
                            )
                            if spares:
                                spareCodes = ", ".join(spares)

                        accessoriesUrl = otherUrl + "AjaxProductAccessories.raction"
                        self.logger.debug("== Accessories URL [" + accessoriesUrl + "] ==")
                        accessories = self.spider.fetchData(accessoriesUrl)
                        if accessories:
                            accessories = self.regex.getAllSearchedData(
                                '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', accessories
                            )
                            if accessories:
                                accessoryCode = ", ".join(accessories)

                        docUrl = otherUrl + "AjaxProductDocuments.raction"
                        self.logger.debug("== Document URL[" + docUrl + "] ==")
                        userManuals = self.spider.fetchData(docUrl)
                        if userManuals:
                            userManual = self.regex.getSearchedData(
                                '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download User Manual</a>',
                                userManuals,
                            )
                            self.logger.debug("Manual URL: " + userManual)
                            if userManual:
                                userManualUrl = self.mainUrl + self.regex.replaceData(" ", "%20", userManual)
                                self.logger.debug("User Manual URL: " + userManualUrl)
                                self.scrapProductData.emit("<b>User Manual PDF URL: </b>%s" % userManualUrl)
                                userManual = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_. ]*)$", userManual)
                                userManual = self.regex.replaceData("\s+", "_", userManual.strip())
                                self.scrapProductData.emit(
                                    "<font color=green><b>Downloading User Manual: </b>%s <b>Please Wait...</b>"
                                    % userManual
                                )
                                self.utils.downloadFile(userManualUrl, "user_manual/" + userManual)

                            explodedView = self.regex.getSearchedData(
                                '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download Exploded Diagram</a>',
                                userManuals,
                            )
                            if explodedView:
                                explodedViewUrl = self.mainUrl + self.regex.replaceData(" ", "%20", explodedView)
                                self.scrapProductData.emit("<b>Exploded Diagram PDF URL: </b>%s" % explodedViewUrl)
                                explodedView = self.regex.getSearchedData("(?i)/([a-zA-Z0-9-_. ]*)$", explodedView)
                                explodedView = self.regex.replaceData("\s+", "_", explodedView.strip())
                                self.scrapProductData.emit(
                                    "<font color=green><b>Downloading Exploded Diagram: </b>%s <b>Please Wait...</b>"
                                    % explodedView
                                )
                                self.utils.downloadFile(explodedViewUrl, "exploded_view/" + explodedView)

                    csvData = [
                        productUrl,
                        productCode,
                        productTechSpecs,
                        productName,
                        brandName,
                        price.strip(),
                        productDesc,
                        productLongDesc,
                        productImage,
                        userManual,
                        explodedView,
                        spareCodes,
                        accessoryCode,
                        productStatus,
                        category1,
                        category2,
                        category3,
                        category4,
                    ]
                    self.csvWriter.writeCsvRow(csvData)
                    self.logger.debug("Scraped data " + str(csvData))
                    self.scrapProductData.emit("<div><b>Scraped Data: </b>%s<br /></div>" % str(csvData))
Example #41
0
 def __init__(self):
     constants.__init__(self)
     Utils.__init__(self)
class CsProduct(QThread):
    notifyProduct = pyqtSignal(object)

    def __init__(self):
        QThread.__init__(self)
        self.logger = LogManager(__name__)
        self.spider = Spider()
        self.regex = Regex()
        dupCsvReader = Csv()
        self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 0)
        self.csvWriter = Csv('cs_product.csv')
        self.mainUrl = 'http://www.cs-catering-equipment.co.uk/'
        self.utils = Utils()
        self.csvWriter.writeCsvRow(
            ['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount',
             'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty',
             'Delivery',
             'Product Image',
             'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image'])
        self.totalProducts = 0

    def run(self):
        self.scrapProduct()
        self.notifyProduct.emit('<font color=red><b>Finished Scraping All products.</b></font>')

    def scrapProduct(self):
        self.logger.debug('Main URL: ' + self.mainUrl)
        self.notifyProduct.emit('<font color=green><b>Main URL: %s</b></font>' % self.mainUrl)
        data = self.spider.fetchData(self.mainUrl)
        data = self.regex.reduceNewLine(data)
        data = self.regex.reduceBlankSpace(data)
        self.notifyProduct.emit('<b>Try to scrap all categories.</b>')
        categories = self.regex.getAllSearchedData('(?i)<a href="([^"]*)" class="level-top" title="([^"]*)"', data)
        if categories and len(categories) > 0:
            self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories)))
            for category in categories:
                category1Name = unicode(category[1]).strip()
                self.scrapCategory1Data(str(category[0]).strip(), category1Name)

    def scrapCategory1Data(self, url, category1Name):
        self.logger.debug('Category 1 URL: ' + url)
        self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category1Name)
        self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url)
        data = self.spider.fetchData(url)
        data = self.regex.reduceNewLine(data)
        data = self.regex.reduceBlankSpace(data)
        categories = self.regex.getAllSearchedData(
            '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data)
        if categories and len(categories) > 0:
            self.notifyProduct.emit('<b>Total Categories Found: %s</b>' % str(len(categories)))
            for category in categories:
                self.scrapCategory2Data(category[0], category1Name, category[1])

    def scrapCategory2Data(self, url, category1Name, category2Name):
        self.logger.debug('Category 2 URL: ' + url)
        self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category2Name)
        self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url)
        data = self.spider.fetchData(url)
        data = self.regex.reduceNewLine(data)
        data = self.regex.reduceBlankSpace(data)
        categories = self.regex.getAllSearchedData(
            '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data)
        if categories and len(categories) > 0:
            for category in categories:
                print 'category2: ' + category[0]
                self.scrapCategory3Data(category[0], category1Name, category2Name, category[1])

    def scrapCategory3Data(self, url, category1Name, category2Name, category3Name):
        self.logger.debug('Category 3 URL: ' + url)
        self.notifyProduct.emit('<b>Try to scrap all categories under Category[%s]</b>' % category3Name)
        self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url)
        data = self.spider.fetchData(url)
        if data and len(data) > 0:
            data = self.regex.reduceNewLine(data)
            data = self.regex.reduceBlankSpace(data)
            categories = self.regex.getAllSearchedData(
                '(?i)<li> <a href="([^"]*)" title="([^"]*)"[^>]*?>[^<]*?</a> </li>', data)
            if categories and len(categories) > 0:
                for category in categories:
                    print [category1Name, category2Name, category3Name, category[1]]
                    self.scrapProductsDetails(category[0], category1Name, category2Name, category3Name, category[1])

    def scrapProductsDetails(self, url, category1Name, category2Name, category3Name, category4Name):
        self.logger.debug('Product Details URL: ' + url)
        self.notifyProduct.emit('<b>Try to scrap all products under Category[%s]</b>' % category4Name)
        self.notifyProduct.emit('<font color=green><b>Category URL: %s</b></font>' % url)
        data = self.spider.fetchData(url + '?limit=10000&mode=list')
        if data and len(data) > 0:
            data = self.regex.reduceNewLine(data)
            data = self.regex.reduceBlankSpace(data)
            products = self.regex.getAllSearchedData('(?i)<div class="listing-item[^"]*?">(.*?)</div>', data)
            if products and len(products) > 0:
                self.totalProducts += len(products)
                self.notifyProduct.emit('<font color=green><b>Total Products Found [%s]</b></font>' % unicode(self.totalProducts))
                for product in products:
                    productDetailUrl = self.regex.getSearchedData('(?i)<a href="([^"]*)"', product)
                    if productDetailUrl not in self.dupCsvRows:
                        self.scrapProductDetails(productDetailUrl, category1Name, category2Name, category3Name,
                            category4Name)
                    else:
                        self.notifyProduct.emit(
                            '<font color=green><b>Already Exists This Product Under Category[%s]. Skip It.</b></font>' % category4Name)

    def scrapProductDetails(self, url, category1Name, category2Name, category3Name, category4Name):
        self.logger.debug('Product Detail URL: ' + url)
        self.notifyProduct.emit('<b>Try to scrap product details under Category[%s]</b>' % category4Name)
        self.notifyProduct.emit('<font color=green><b>Product Detail URL: %s</b></font>' % url)
        data = self.spider.fetchData(url)
        if data and len(data) > 0:
            data = self.regex.reduceNewLine(data)
            data = self.regex.reduceBlankSpace(data)
            manufacturer = self.regex.getSearchedData(
                '(?i)<span class="manufacturer-box-label">Manufacturer:</span>([^<]*)</p>', data)
            productCode = self.regex.getSearchedData(
                '(?i)<span class="manufacturer-box-label">Model No:</span>([^<]*)</p>',
                data)
            productName = self.regex.getSearchedData('(?i)<div class="product-name"> <h1>([^<]*)</h1>', data)
            productTechnicalDesc = self.regex.getSearchedData('(?i)<div class="product-short-description">([^<]*)</div>'
                , data)
            productDescriptions = self.regex.getSearchedData('(?i)<div class="product-specs">(.*?)</div>', data)
            productShortDesc = ''
            productFullDesc = ''
            if productDescriptions and len(productDescriptions) > 0:
                print 'desc: ' + productDescriptions
                productShortDesc = self.regex.getSearchedData('(?i)<p>(.*?)</p>', productDescriptions)
                productFullDesc = '\n'.join(
                    self.regex.getAllSearchedData('(?i)<li>([^<]*)</li>', productDescriptions))
            listPriceChunk = self.regex.getSearchedData('(?i)<div class="rrp-price regular-price">(.*?)</div>', data)
            listPrice = ''
            if listPriceChunk and len(listPriceChunk) > 0:
                listPrice = self.regex.getSearchedData('(?i)([0-9,.]+)', listPriceChunk)

            savePriceChunk = self.regex.getSearchedData('(?i)<div class="regular-price saving-price">(.*?)</div>', data)
            savePrice = ''
            if savePriceChunk and len(savePriceChunk) > 0:
                savePrice = self.regex.getSearchedData('(?i)([0-9%]+)', savePriceChunk)

            priceChunk = self.regex.getSearchedData('(?i)<div class="[^"]*" id="product-price-\d+">(.*?)</div>', data)
            price = ''
            if priceChunk and len(priceChunk) > 0:
                price = self.regex.getSearchedData('(?i)([0-9,.]+)', priceChunk)

            deliveryChunk = self.regex.getSearchedData('(?i)<div class="delivery">(.*?)</div>', data)
            delivery = ''
            if deliveryChunk and len(deliveryChunk) > 0:
                delivery = self.regex.getSearchedData('(?i)<p>([^<]*)</p>', deliveryChunk)

            warrantyChunk = self.regex.getSearchedData('(?i)<div class="warranty">(.*?)</div>', data)
            warranty = ''
            if warrantyChunk and len(warrantyChunk) > 0:
                warranty = self.regex.getSearchedData('(?i)<p>([^<]*)</p>', warrantyChunk)

            ## Download and save product images
            productImageUrl = self.regex.getSearchedData(
                '(?i)src="(http://assets.cs-catering-equipment.co.uk/media/catalog/product/cache/1/image/256x/[^"]*)"',
                data)
            print productImageUrl
            productImage = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_.]*)$', productImageUrl)
            if productImage and len(productImage) > 0:
                print productImage
                self.notifyProduct.emit('<b>Downloading Product Image [%s]. Please wait...</b>' % productImage)
                self.utils.downloadFile(productImageUrl, 'product_image/' + productImage)

            ## Download and save brand images
            brandImageUrl = self.regex.getSearchedData(
                '(?i)<div class="manufacturer-box-left"><a href="[^"]*"[^>]*?><img src="([^"]*)"', data)
            brandImage = ''
            if brandImageUrl and len(brandImageUrl) > 0:
                brandImageUrl = self.regex.replaceData('(?i)logo/', '', brandImageUrl)
                brandImage = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_.]*)$', brandImageUrl)
                if brandImage and len(brandImage) > 0:
                    self.notifyProduct.emit('<b>Downloading Brand Image [%s]. Please wait...</b>' % brandImage)
                    self.utils.downloadFile(brandImageUrl, 'brand_image/' + brandImage)

            csvData = [url, productCode, productName, manufacturer, listPrice, price, savePrice,
                       productShortDesc, productFullDesc, productTechnicalDesc, warranty, delivery,
                       productImage,
                       category1Name, category2Name, category3Name, category4Name, brandImage]

            self.csvWriter.writeCsvRow(csvData)
            self.logger.debug(unicode(csvData))
            self.notifyProduct.emit('<b>Product Details: %s</b>' % unicode(csvData))
Example #43
0
    def radioParser(self):

        pages_array = []
        all_genres = self.genresParser()

        for item in range(len(all_genres)):
            radio_url_genres = 'http://vtuner.com/setupapp/guide/asp/BrowseStations/BrowsePremiumStations.asp?sCategory=' + all_genres[item] + '&sBrowseType=Format&sViewBy=&sSortby=&sWhatList=&sNiceLang=&iCurrPage=1'
            url_clean = urllib.urlopen(radio_url_genres)
            soup = BeautifulSoup(url_clean, "lxml")
            pages = soup.findAll('div')
            for row in pages:
                y = row.findAll('a', {"class":"paging"})
                for row_1 in y:
                    k = row_1.findAll('b')
                    for row_2 in k:
                        l = row_2.getText()
                        pages_array.append(l)

            for number in range(len(pages_array)):
                radio_urls = 'http://vtuner.com/setupapp/guide/asp/BrowseStations/BrowsePremiumStations.asp?sCategory=' + all_genres[item] + '&sBrowseType=Format&sViewBy=&sSortby=&sWhatList=&sNiceLang=&iCurrPage=' + pages_array[number]
                m3u_url = 'http://vtuner.com/setupapp/guide/asp/'
                url_ready = urllib.urlopen(radio_urls)
                soup_radios = BeautifulSoup(url_ready, "lxml")
                main_table = soup_radios.find('table', id='table1').findAll('tr')
                for tab in main_table:
                    table = tab.findAll('table')
                    for tagz in table:
                        oi = tagz.findAll('tr')
                        # Need try..catch cuz some URLs can be broken
                        # so whole for going to be try..catch
                        try:
                            for tr in oi:
                                station_url = ''
                                station_name = ''
                                station_location = ''
                                station_country = ''
                                station_genre = ''
                                station_quality = ''
                                station_updated = datetime.datetime.now()
                                alTds = tr.findAll('td')
                                if len(alTds) < 5:
                                    continue
                                all_td_string = ''.join([str(x) for x in alTds])
                                bg_tag = 'bgcolor="#FFFFFF"'
                                strong_tag = '<strong>Station Name</strong>'
                                if bg_tag in all_td_string or strong_tag in all_td_string:
                                    continue
                                if len(alTds) > 0:
                                    allTdLinks = alTds[0].findAll('a')
                                    if len(allTdLinks) > 0:
                                        station_url = m3u_url + allTdLinks[0]['href']
                                        print "DEBUG URL #1 ::::> " + station_url
                                        station_url = station_url.replace('../', '')
                                        print "DEBUG URL #2 ::::> " + station_url
                                        station_url = Utils.parse_m3u_file(station_url)
                                        real_station_url = station_url[0]
                                        clean_url = station_url[1]
                                        print "STATION URL: " + str(real_station_url)
                                        print "CLEAN URL: " + str(clean_url)
                                        logging.info('\n')
                                        logging.info('--- Radio block starts here ---')
                                        logging.info("URL of Radio: " + str(real_station_url))
                                if len(alTds) > 1:
                                    allTdLinks = alTds[1].findAll('a')
                                    if len(allTdLinks) > 0:
                                        station_name = allTdLinks[0].getText()
                                        logging.info("Name of Radio: " + station_name)
                                if len(alTds) > 2:
                                    station_location = alTds[2].getText()
                                    station_country = self.countryParseObj.get_country(station_location)
                                    logging.info("Location of Radio: " + station_location)
                                    logging.info("Country of Radio: " + station_country)
                                if len(alTds) > 3:
                                    allTdLinks = alTds[3].findAll('a')
                                    if len(allTdLinks) > 0:
                                        station_genre = allTdLinks[0].getText()
                                        logging.info("Genre of Radio: " + station_genre)
                                if len(alTds) > 4:
                                        station_quality = alTds[4].getText()
                                        logging.info("Quality of Radio: " + station_quality)
                                        logging.info('--- Radio block ends here ---')

                                # Remove quotes for MySQL inserts
                                station_name = self.utilsObj.replace_quots(station_name)

                                ''' look IF station already EXIST in DB '''
                                check_station = "SELECT id from `radio_station_stream_urls` where url REGEXP ('" + clean_url + "') LIMIT 1;"
                                check_station_result = self.mysql_obj.make_select(check_station)
                                logging.info("Station ID is: " + str(check_station_result))

                                if not check_station_result:
                                    query_radio = "INSERT INTO `radio_stations`(`name`, `location`, `country`, `updated`) VALUES ('" + station_name + "'," + "'" + station_location + "'," + "'" + str(station_country) + "'," + "'" + str(station_updated) + "');"
                                    insert_id = self.mysql_obj.make_insert(query_radio)

                                    if insert_id != -1:
                                        station_quality = re.sub("\D", "", station_quality)
                                        query_url_and_bitrate = "INSERT INTO `radio_station_stream_urls`(`station_id`, `url`, `bitrate`) VALUES('" + str(insert_id) + "'," + "'" + real_station_url + "'," + "'" + station_quality + "');"
                                        self.mysql_obj.make_insert(query_url_and_bitrate)

                                    sep = "/"
                                    genre = station_genre.split(sep, 1)[0]

                                    query_get_genre_id = "SELECT `id` from `music_genres` WHERE `name`= " + "'" + genre + "'" + ";"
                                    result_genre_id = self.mysql_obj.make_select(query_get_genre_id)

                                    if not result_genre_id:
                                        query_insert_genre = "INSERT INTO `music_genres` (`name`) VALUES ('" + str(genre) + "');"
                                        id_genre_is = self.mysql_obj.make_insert(query_insert_genre)
                                        logging.info("Result is NONE, Adding tnew genre!")
                                    else:
                                        print "GENRE ID: " + str(result_genre_id[0]['id']) + "\n"
                                        id_genre_is = str(result_genre_id[0]['id'])

                                    query_insert_id_of_genre = "INSERT into `radio_station_genres` (`station_id`, `genre_id`) VALUES ('" + str(insert_id) + "','" + str(id_genre_is) + "');"
                                    self.mysql_obj.make_insert(query_insert_id_of_genre)
                                else:
                                    print "Radio station - ALREADY EXIST!" + "\n"
                        except StandardError, e:
                            print e