Example #1
0
 def get_data_from_co2_signal(latitude, longitude, country_code_default):
     if Ecomix.co2_signal_key is not None:
         try:
             now = datetime.utcnow().replace(tzinfo=UTC)
             country_code = Ecomix.get_country(latitude, longitude,
                                               country_code_default)
             assert country_code is not None
             if country_code not in Ecomix._cache:
                 Ecomix._cache[country_code] = []
             elif len(Ecomix._cache[country_code]) > 0 and \
                     (now - Ecomix._cache[country_code][-1][0]).total_seconds() < CO2_SIGNAL_REQ_INTERVAL:
                 return False
             res = requests.get(
                 CO2_SIGNAL_URL + "/v1/latest",
                 headers={"auth-token": Ecomix.co2_signal_key},
                 params={"countryCode": country_code})
             data = res.json()
             value = data["data"]["carbonIntensity"]
             assert isinstance(value, numbers.Number)
             Ecomix._cache[country_code].append([now, value])
             return data["status"] == "ok"
         except (AssertionError, NameError, KeyError):
             logger.debug("ecomix:", exc_info=True)
             return False
     else:
         return False
Example #2
0
    def doWork(self):
        '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行,
	   功能:从为自己分配的下载队列中取出url进行下载
	   '''
        logger.debug('Start downloader`s doWork...')
#	self.test()
        while True:
            if self.__dlQueue.qsize() > 0:
                urlNode = self.__dlQueue.get()
                self.__downloadingFlag += 1
                page = self.__downloadPage(urlNode.url)
                if len(page) == 0:
                    self.__downloadingFlag -= 1
                    continue
#                logger.debug('download page success, url: %s', urlNode.url)
                # 将下载的html页面封装为内部数据格式并添加到html队列供解析模块解析
                htmlNode = HtmlModel(urlNode.url, page, timestamp(), urlNode.depth)
                self.__htmlQueue.put(htmlNode)
                self.__downloadingFlag -= 1
            # 检测退出事件
            if self.__exitEvent.is_set():
                logger.info('Download model quit...')
                return
            # 下载时间间隔
            time.sleep(FETCH_TIME_INTERVAL)
Example #3
0
def parse(line):
    match = pattern.match(line)
    if match:
        vpn_account = match.group(1)
        logger.debug('find vpn account {}', vpn_account)
        return vpn_account
    return None
 def refresh_remote_token(self, force=False):
     if not force and self.remote_token_last_update is not None:
         last_update: datetime = self.remote_token_last_update
         if (datetime.now() - last_update).total_seconds() < MQTT_TOKEN_TTL:
             return None
     self.refresh_token()
     try:
         if self.remote_refresh_token is None:
             logger.error("remote_refresh_token isn't defined")
             self.load_otp(force_new=True)
         res = self.manager.post(REMOTE_URL + self.client_id,
                                 json={"grant_type": "refresh_token", "refresh_token": self.remote_refresh_token},
                                 headers=self.headers)
         data = res.json()
         logger.debug("refresh_remote_token: %s", data)
         if "access_token" in data:
             self.remote_access_token = data["access_token"]
             self.remote_refresh_token = data["refresh_token"]
             self.remote_token_last_update = datetime.now()
         else:
             logger.error("can't refresh_remote_token: %s\n Create a new one", data)
             self.remote_token_last_update = datetime.now()
             otp_code = self.get_otp_code()
             res = self.get_remote_access_token(otp_code)
         self.mqtt_client.username_pw_set("IMA_OAUTH_ACCESS_TOKEN", self.remote_access_token)
         self.save_config()
         return res
     except RequestException as e:
         logger.error("Can't refresh remote token %s", e)
         sleep(60)
         return None
Example #5
0
 def createWorkers(self, threadNum, pollTimeout):
     '''创建工作线程并将它们添加到线程池中'''
     logger.debug('Start create work thread...')
     for i in range(min(MAX_THREAD_NUM, threadNum)):
         # 为工作线程传递任务队列
         self.__workers.append(
             WorkerThread(self.__requestsQueue, pollTimeout))
Example #6
0
def read_file_trans(fn):
    nlp = train_ner_cn.init_model_4_pred(
        '../../../guiyang_real_population_address/trunk/models/ner')

    result = {}
    cont = ''
    with open(fn, 'rb') as g:
        cont = g.read().decode('utf-8')
    lines = cont.split('\n')
    np.random.shuffle(lines)
    cnt = 0
    for line in lines[:3000]:
        try:
            json_result = __pred(line, nlp)
            #json_result = test(line)
            #json_result = pred(line, nlp)
            logger.debug(json_result)
            k = tuple(json_result.items())[0][0]
            v = tuple(json_result.items())[0][1]
            result[k] = v
            logger.debug('最终保存样本是')
            logger.debug(k)
            logger.debug(v)
            cnt += 1
            if cnt % 100 == 0:
                logger.debug(cnt)
        except BaseException:
            traceback.print_exc()
            continue
    with open("./ner_train.json", 'w') as json_file:
        json.dump(result, json_file)
    return 0
Example #7
0
 def test_extractEntities(self):
     file= u'/tmp/esx-w2-erqa230-2014-05-09--23.23/var/log/hostd.log'
     patterns = ['\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\dZ.*/(?P<entity>.*\.vmx).*Failed to load virtual machine: vim.fault.FileNotFound']
     entities = set()
     extractEntities(file, patterns, entities)
     logger.debug(entities)
     self.assertEqual(len(entities), 2)
 def __on_mqtt_message(self, client, userdata, msg):
     try:
         logger.info("mqtt msg %s %s", msg.topic, msg.payload)
         data = json.loads(msg.payload)
         charge_info = None
         if msg.topic.startswith(MQTT_RESP_TOPIC):
             if "return_code" not in data:
                 logger.debug("mqtt msg hasn't return code")
             elif data["return_code"] == "400":
                 self.refresh_remote_token(force=True)
                 logger.error("retry last request, token was expired")
             elif data["return_code"] == "300":
                 logger.error('%s', data["return_code"])
             elif data["return_code"] != "0":
                 logger.error('%s : %s', data["return_code"], data["reason"])
                 if msg.topic.endswith("/VehicleState"):
                     charge_info = data["resp_data"]["charging_state"]
                     self.precond_programs[data["vin"]] = data["resp_data"]["precond_state"]["programs"]
         elif msg.topic.startswith(MQTT_EVENT_TOPIC):
             charge_info = data["charging_state"]
         if charge_info is not None and charge_info['remaining_time'] != 0 and charge_info['rate'] == 0:
             # fix a psa server bug where charge beginning without status api being properly updated
             logger.warning("charge begin but API isn't updated")
             sleep(60)
             self.wakeup(data["vin"])
     except KeyError:
         logger.exception("mqtt message:")
    def record_info(self, car: Car):
        mileage = car.status.timed_odometer.mileage
        level = car.status.get_energy('Electric').level
        level_fuel = car.status.get_energy('Fuel').level
        charge_date = car.status.get_energy('Electric').updated_at
        moving = car.status.kinetic.moving

        longitude = car.status.last_position.geometry.coordinates[0]
        latitude = car.status.last_position.geometry.coordinates[1]
        altitude = car.status.last_position.geometry.coordinates[2]
        date = car.status.last_position.properties.updated_at
        if date is None:
            date = charge_date
        logger.debug("vin:%s longitude:%s latitude:%s date:%s mileage:%s level:%s charge_date:%s level_fuel:"
                     "%s moving:%s", car.vin, longitude, latitude, date, mileage, level, charge_date, level_fuel,
                     moving)
        Database.record_position(self.weather_api, car.vin, mileage, latitude, longitude, altitude, date, level,
                                 level_fuel, moving)
        self.abrp.call(car, Database.get_last_temp(car.vin))
        try:
            charging_status = car.status.get_energy('Electric').charging.status
            charging_mode = car.status.get_energy('Electric').charging.charging_mode
            Charging.record_charging(car, charging_status, charge_date, level, latitude, longitude, self.country_code,
                                     charging_mode)
            logger.debug("charging_status:%s ", charging_status)
        except AttributeError:
            logger.error("charging status not available from api")
Example #10
0
    def doWork(self):
        '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行'''
        logger.debug('Start storage`s doWork...')
        if not self.__initDB():
            logger.error('Storage thread is stop.')
            return

        conn = sqlite3.connect(self.__dbPath)
        cur = conn.cursor()
        while True:
            try:
                # 从data队列获取数据并插入数据库
                if self.__dataQueue.qsize() > 0:
                    data = self.__dataQueue.get()
                    sqlInsert = '''INSERT INTO zspider(url, time, depth) VALUES ('%s', '%s', %d)''' % (data.url, data.time, data.depth)
                    cur.execute(sqlInsert)
                    conn.commit()
                else:
                    time.sleep(1)
            except Exception, e:
                logger.error('Database operate exception: %s ', str(e))
                continue
            # 检测退出事件
            if self.__exitEvent.is_set():
                cur.close()
                conn.close()
                logger.info('Storage model quit...')
                return
Example #11
0
 def call(self, car: Car, ext_temp: float = None):
     try:
         if self.token is None or len(self.token) == 0:
             logger.debug("No abrp token provided")
         elif car.vin in self.abrp_enable_vin:
             energy = car.status.get_energy('Electric')
             tlm = {"utc": int(datetime.timestamp(energy.updated_at)),
                    "soc": energy.level,
                    "speed": getattr(car.status.kinetic, "speed", None),
                    "car_model": car.get_abrp_name(),
                    "current": car.status.battery.current,
                    "is_charging": energy.charging.status == "InProgress",
                    "lat": car.status.last_position.geometry.coordinates[1],
                    "lon": car.status.last_position.geometry.coordinates[0],
                    "power": energy.consumption
                    }
             if ext_temp is not None:
                 tlm["ext_temp"] = ext_temp
             params = {"tlm": json.dumps(tlm), "token": self.token, "api_key": self.api_key}
             response = requests.request("POST", self.url, params=params, proxies=self.proxies,
                                         verify=self.proxies is None)
             logger.debug(response.text)
             return response.json()["status"] == "ok"
     except (AttributeError, IndexError, ValueError):
         logger.exception("abrp:")
     return False
Example #12
0
def getTgzFiles(folder_path):
    page = BeautifulSoup(urlopen(folder_path).read())
    urls = []
    logger.debug(folder_path)
    for link in page.findAll('a', href=re.compile(r'.*\.tgz')):
        urls.append(folder_path + '/' + link['href'])
    return urls
Example #13
0
    def doWork(self):
        '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行,
	   功能:从为自己分配的下载队列中取出url进行下载
	   '''
        logger.debug('Start downloader`s doWork...')
        #	self.test()
        while True:
            if self.__dlQueue.qsize() > 0:
                urlNode = self.__dlQueue.get()
                self.__downloadingFlag += 1
                page = self.__downloadPage(urlNode.url)
                if len(page) == 0:
                    self.__downloadingFlag -= 1
                    continue
#                logger.debug('download page success, url: %s', urlNode.url)
# 将下载的html页面封装为内部数据格式并添加到html队列供解析模块解析
                htmlNode = HtmlModel(urlNode.url, page, timestamp(),
                                     urlNode.depth)
                self.__htmlQueue.put(htmlNode)
                self.__downloadingFlag -= 1
            # 检测退出事件
            if self.__exitEvent.is_set():
                logger.info('Download model quit...')
                return
            # 下载时间间隔
            time.sleep(FETCH_TIME_INTERVAL)
Example #14
0
    def doWork(self):
        '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行'''
        logger.debug('Start storage`s doWork...')
        if not self.__initDB():
            logger.error('Storage thread is stop.')
            return

        conn = sqlite3.connect(self.__dbPath)
        cur = conn.cursor()
        while True:
            try:
                # 从data队列获取数据并插入数据库
                if self.__dataQueue.qsize() > 0:
                    data = self.__dataQueue.get()
                    sqlInsert = '''INSERT INTO zspider(url, time, depth) VALUES ('%s', '%s', %d)''' % (
                        data.url, data.time, data.depth)
                    cur.execute(sqlInsert)
                    conn.commit()
                else:
                    time.sleep(1)
            except Exception, e:
                logger.error('Database operate exception: %s ', str(e))
                continue
            # 检测退出事件
            if self.__exitEvent.is_set():
                cur.close()
                conn.close()
                logger.info('Storage model quit...')
                return
Example #15
0
 def publish(self, mqtt_request: MQTTRequest, store=True):
     self._refresh_remote_token()
     message = mqtt_request.get_message_to_json(
         self.remoteCredentials.access_token)
     logger.debug("%s %s", mqtt_request.topic, message)
     self.mqtt_client.publish(mqtt_request.topic, message)
     if store:
         self.last_request = [mqtt_request]
Example #16
0
def _clean_text(text, cleaner_names, verbose):
    for name in cleaner_names:
        cleaner = getattr(cleaners, name)
        if not cleaner:
            raise Exception('Unknown cleaner: %s' % name)
        text = cleaner(text)
    if verbose:
        logger.debug(f"cleaned text: {text}")
    return text
 def refresh_vehicle_info(self):
     if self.info_refresh_rate is not None:
         while True:
             sleep(self.info_refresh_rate)
             logger.debug("refresh_vehicle_info")
             for car in self.vehicles_list:
                 self.get_vehicle_info(car.vin)
             for callback in self.info_callback:
                 callback()
Example #18
0
 def add_table(self, src, figure):
     try:
         table = Table(figure.id, src, figure)
         table.date_columns = [
             col["id"][:-4] for col in figure.columns
             if col["type"] == "datetime" and col["id"].endswith("_str")
         ]
         self.tables.append(table)
     except AttributeError:
         logger.debug("figure isn't a table")
Example #19
0
def load_otp(filename="otp.bin"):
    try:
        with open(filename, 'rb') as input_file:
            try:
                return pickle.load(input_file)
            except ModuleNotFoundError:
                return RenameUnpickler(input_file).load()
    except FileNotFoundError:
        logger.debug("", exc_info=True)
    return None
Example #20
0
 def get_value_or_default(self, key):
     val = getattr(self, key, None)
     if val is None:
         field = self.__mappings__[key]
         if field.default is not None:
             val = field.default() if callable(
                 field.default) else field.default
             logger.debug('using default value for {}:{}'.format(
                 key, str(val)))
             setattr(self, key, val)
     return val
Example #21
0
 def clean_position(conn):
     res = conn.execute(
         "SELECT Timestamp,mileage,level from position ORDER BY Timestamp DESC LIMIT 3;"
     ).fetchall()
     # Clean DB
     if len(res) == 3 and res[0]["mileage"] == res[1]["mileage"] == res[2]["mileage"] and \
             res[0]["level"] == res[1]["level"] == res[2]["level"]:
         logger.debug("Delete duplicate line")
         conn.execute("DELETE FROM position where Timestamp=?;",
                      (res[1]["Timestamp"], ))
         conn.commit()
Example #22
0
def parse_log_file(log_file):
    with open(log_file, encoding='gb18030') as file:
        for line in file:
            vpn_account = parse(line)
            if vpn_account:
                if not vpn_account in summary_data:
                    summary_data[vpn_account] = 1
                else:
                    summary_data[vpn_account] += 1
            else:
                logger.debug('not find vpn account in {}', line)
Example #23
0
def extractFiles(paths):
    dirs = []
    for path in paths:
        directory = path[0:path.rfind('.')]
        if os.path.exists(directory):
            logger.debug("%s already exists, won't extract again" % directory)
            dirs.append(directory)
            continue
        logger.debug("extracting file %s" % path)
        extractFile(path)
        dirs.append(directory)
    return dirs
Example #24
0
 def gen_qrcode(self):
     url = 'https://login.weixin.qq.com/l/' + self.uuid
     logger.debug("qrcode url:%s", url)
     qr = qrcode.QRCode()
     qr.border = 1
     qr.add_data(url)
     qr.make()
     # img = qr.make_image()
     # img.save("qrcode.png")
     # mat = qr.get_matrix()
     # self._printQR(mat)  # qr.print_tty() or qr.print_ascii()
     qr.print_ascii(invert=True)
Example #25
0
 def doWork(self):
     '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行'''
     logger.debug('Start parser`s doWork...')
     while True:
         if self.__htmlQueue.qsize() > 0:
             self.__parsePage()
         else:
             time.sleep(1)
         #检测退出事件
         if self.__exitEvent.is_set():
             logger.info('Parser model quit...')
             return
Example #26
0
 def __refresh_vehicle_info(self):
     if self.info_refresh_rate is not None:
         while True:
             try:
                 logger.debug("refresh_vehicle_info")
                 for car in self.vehicles_list:
                     self.get_vehicle_info(car.vin)
                 for callback in self.info_callback:
                     callback()
             except:  # pylint: disable=bare-except
                 logger.exception("refresh_vehicle_info: ")
             sleep(self.info_refresh_rate)
Example #27
0
 def doWork(self):
     '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行'''
     logger.debug('Start parser`s doWork...')
     while True:
         if self.__htmlQueue.qsize() > 0:
             self.__parsePage()
         else:
             time.sleep(1)
         #检测退出事件
         if self.__exitEvent.is_set():
             logger.info('Parser model quit...')
             return
Example #28
0
 def load_cars(name=None):
     if name is None:
         name = Cars().config_filename
     try:
         with open(name, "r", encoding="utf-8") as file:
             json_str = file.read()
             cars = Cars.from_json(json.loads(json_str))
             cars.config_filename = name
             cars.save_cars()
             return cars
     except (FileNotFoundError, TypeError) as ex:
         logger.debug(ex)
         return Cars()
Example #29
0
def downloadSupportBundles(url):
    urls = processUrl(url)
    if urls is None:
        return "Error: no tgz files found!"
    else:
       hash1 = hashlib.sha1()
       for url1 in urls:
           hash1.update(url1)
       # new dir name
       newdir = hash1.hexdigest()
       logger.debug("directory name for url %s is %s" % (url, newdir))
       local_paths = downloadFiles(newdir, urls)
       return local_paths
Example #30
0
    def dismissWorkers(self, threadNum, doJoin = False):
	'''停用工作线程,doJoin为True则等待线程结束再返回,否则直接返回'''
	logger.debug('Dismiss worker thread, Num : %d ', threadNum)
        tmpList = []
        for i in range(min(threadNum, len(self.__workers))):
            worker = self.__workers.pop()
            worker.dismiss()
            tmpList.append(worker)

        if doJoin:
            for worker in tmpList:
                worker.join()
        else:
            self.__dismissedWorkers.extend(tmpList)
Example #31
0
def _load_dict_from_file():
    current_dir = Path(__file__).resolve().parent
    t1 = time.time()
    acronymn_list = []
    print(os.getcwd())
    with open(current_dir / '/home/shihangyu/Scripts/tacotron2-nvidia-ysh/text/cleaner/wikipedia-acronyms-simple.json','r') as f:
        while True:
            text = f.readline()
            if text == '':
                break
            acronymn_list.append(text.strip())

    logger.debug(f'load acronymn dict use time {time.time() - t1}')
    return acronymn_list
Example #32
0
 def doTransform(self):
     sourceFiles = self.sourceFiles()
     logger.info("transform {} source files", len(sourceFiles))
     for dataEntry in self.dictData:
         logger.debug("process 学号: {}/报名号: {}", int(dataEntry['学号']),
                      dataEntry['报名号'])
         sourcePath1 = os.path.join(self.inputDir,
                                    str(dataEntry['证件号码']) + ".jpg")
         sourcePath2 = os.path.join(self.inputDir,
                                    str(int(dataEntry['考生编号'])) + ".jpg")
         sourcePath3 = os.path.join(
             self.inputDir, '10673_18_' + str(dataEntry['报名号']) + ".jpg")
         targetPath = os.path.join(self.outputDir,
                                   str(int(dataEntry['学号'])) + ".jpg")
         if os.path.exists(sourcePath1):
             logger.debug("rename from (证件号码) {} to {}", sourcePath1,
                          targetPath)
             os.rename(sourcePath1, targetPath)
         elif os.path.exists(sourcePath2):
             logger.debug("rename from (考生编号) {} to {}", sourcePath2,
                          targetPath)
             os.rename(sourcePath2, targetPath)
         elif os.path.exists(sourcePath3):
             logger.debug("rename from (10673_18_报名号) {} to {}",
                          sourcePath3, targetPath)
             os.rename(sourcePath3, targetPath)
         else:
             logger.warn("source file for {} does not exit, skip it",
                         int(dataEntry['学号']))
Example #33
0
    def dismissWorkers(self, threadNum, doJoin=False):
        '''停用工作线程,doJoin为True则等待线程结束再返回,否则直接返回'''
        logger.debug('Dismiss worker thread, Num : %d ', threadNum)
        tmpList = []
        for i in range(min(threadNum, len(self.__workers))):
            worker = self.__workers.pop()
            worker.dismiss()
            tmpList.append(worker)

        if doJoin:
            for worker in tmpList:
                worker.join()
        else:
            self.__dismissedWorkers.extend(tmpList)
Example #34
0
 def record_charging(
         car,
         charging_status,
         charge_date: datetime,
         level,
         latitude,  # pylint: disable=too-many-locals
         longitude,
         country_code,
         charging_mode,
         charging_rate,
         autonomy):
     conn = Database.get_db()
     charge_date = charge_date.replace(microsecond=0)
     if charging_status == "InProgress":
         stop_at, start_at = conn.execute(
             "SELECT stop_at, start_at FROM battery WHERE VIN=? ORDER BY start_at "
             "DESC limit 1", (car.vin, )).fetchone() or [False, None]
         try:
             conn.execute(
                 "INSERT INTO battery_curve(start_at,VIN,date,level,rate,autonomy) VALUES(?,?,?,?,?,?)",
                 (start_at, car.vin, charge_date, level, charging_rate,
                  autonomy))
         except IntegrityError:
             logger.debug("level already stored")
         if stop_at is not None:
             conn.execute(
                 "INSERT INTO battery(start_at,start_level,charging_mode,VIN) VALUES(?,?,?,?)",
                 (charge_date, level, charging_mode, car.vin))
         Ecomix.get_data_from_co2_signal(latitude, longitude, country_code)
     else:
         try:
             start_at, stop_at, start_level = conn.execute(
                 "SELECT start_at, stop_at, start_level from battery WHERE VIN=? ORDER BY start_at DESC limit 1",
                 (car.vin, )).fetchone()
             in_progress = stop_at is None
         except TypeError:
             logger.debug("battery table is probably empty :",
                          exc_info=True)
             in_progress = False
         if in_progress:
             co2_per_kw = Ecomix.get_co2_per_kw(start_at, charge_date,
                                                latitude, longitude,
                                                country_code)
             consumption_kw = (level -
                               start_level) / 100 * car.battery_power
             Charging.update_chargings(conn, start_at, charge_date, level,
                                       co2_per_kw, consumption_kw, car.vin)
     conn.commit()
     conn.close()
Example #35
0
 def _run(self, str, func, *args):
     logger.debug(str)
     if func(*args):
         logger.debug('%s... 成功' % (str))
     else:
         logger.debug('%s... 失败' % (str))
         logger.debug('[*] 退出程序')
         exit(1)
Example #36
0
    def retrieve_messages(self):
        url = self.base_uri + '/webwxsync?sid=%s&skey=%s&pass_ticket=%s' % (self.sid, self.skey, self.pass_ticket)

        dic = self._post(url, {'BaseRequest': self.BaseRequest,
                               'SyncKey': self.SyncKey,
                               'rr': ~int(time.time())
                               })
        if dic == '':
            return None
        logger.debug("retrieve message: %s", dic)

        if dic['BaseResponse']['Ret'] == 0:
            self.SyncKey = dic['SyncKey']
            self.synckey = '|'.join(["%s_%s" % (keyVal['Key'], keyVal['Val']) for keyVal in self.SyncKey['List']])
        return dic['AddMsgList']
Example #37
0
 def doTransform(self):
     sourceFiles = self.sourceFiles()
     logger.info("transform {} source files", len(sourceFiles))
     for dataEntry in self.dictData:
         logger.debug("process 学号: {}/报名号: {}", int(dataEntry['学号']),
                      int(dataEntry['报名号']))
         sourcePath = os.path.join(self.inputDir,
                                   str(int(dataEntry['报名号'])) + ".jpg")
         targetPath = os.path.join(self.outputDir,
                                   str(int(dataEntry['学号'])) + ".jpg")
         if not os.path.exists(sourcePath):
             logger.warn("{} does not exit, skip it", sourcePath)
         else:
             logger.debug("rename {} to {}", sourcePath, targetPath)
             os.rename(sourcePath, targetPath)
Example #38
0
 def get_otp_code(self):
     self.mode = Otp.OTP_MODE
     otp_code = None
     if self.activation_start():
         res = self.activation_finalyze()
         if res != Otp.NOK:
             if res == Otp.OTP_TWICE:
                 self.mode = Otp.OTP_MODE
                 self.activation_start()
                 self.activation_finalyze()
             otp_code = self._get_otp_code()
             logger.debug("otp code: %s", otp_code)
     if otp_code is None:
         raise ConfigException("Can't get otp code")
     return otp_code
Example #39
0
    def start(self):
	'''创建并启动各个模块'''
        logger.debug('Init start urls...')
        self.__initUrlQueue(self.__startUrls)
        
	# 启动threadNum个下载器并为它们分配下载队列 
        logger.debug('Put downloader to thread pool...')
        for i in range(self.__threadNum):
            dlQueue = Queue.Queue()
            self.__downloadQueueList.append(dlQueue)
            downloadReq = Downloader(dlQueue, self.__downloadMode, self.__htmlQueue, self.__exitEvent, self.__downloadingFlag)
            self.__threadPool.putRequest(downloadReq)

	# 创建解析模块并添加到线程池运行
        logger.debug('Put parser to thread pool...')
        parserReq = Parser(self.__depth, self.__startUrls, self.__keyword, self.__htmlQueue, self.__dataQueue, self.__urlQueue, self.__exitEvent)
        self.__threadPool.putRequest(parserReq)

	# 创建存储模块并添加到线程池运行
        logger.debug('Put storage to thread pool...')
        storageReq = Storage(self.__dbName, self.__dataQueue, self.__exitEvent)
        self.__threadPool.putRequest(storageReq)

	# 主循环用于为各个下载队列分配url以及输出日志信息
        logger.debug('start main loop...')
        lastTime = time.time()
        while True:
            for dlQueue in self.__downloadQueueList:
                if self.__urlQueue.qsize() > 0 and dlQueue.qsize() < 1:
                    node = self.__urlQueue.get()
                    dlQueue.put(node)

            now = time.time()
            if now - lastTime > PRINT_TIME_INTERVAL:
                logger.info('URL QUEUE SIZE : %d', self.__urlQueue.qsize())
                logger.info('HTML QUEUE SIZE : %d', self.__htmlQueue.qsize())
                logger.info('DATA QUEUE SIZE : %d', self.__dataQueue.qsize())
                logger.info('REPEAT SET SIZE : %d', parserReq.getRepeatSetSize())
                # 延迟检测退出事件,防止程序启动时即退出 
                if now - lastTime > 30:
                    if self.__urlQueue.qsize() < 1 and self.__htmlQueue.qsize() < 1 and \
	                          self.__dataQueue.qsize() < 1 and self.__downloadingFlag < 1:
                        self.__exitEvent.set()
                        self.__threadPool.close(True)
                        return
                lastTime = now
Example #40
0
    def __initDB(self):
        '''初始化数据库文件路径,并创建数据库'''
        try:
            dbDir = os.getcwd() + '/db/'
            if not os.path.exists(dbDir):
                os.makedirs(dbDir)
            self.__dbPath = dbDir + self.__dbName

            conn = sqlite3.connect(self.__dbPath)
            sqlCreateTable = '''CREATE TABLE IF NOT EXISTS zspider(
                         id integer primary key, url text, html text, time text, depth integer)'''
            conn.execute(sqlCreateTable)
            conn.close()
            logger.debug('Create database success.')
            return True
        except Exception, e:
            logger.error('Init database error : %s', str(e))
            return False
Example #41
0
def downloadFile(local_new_dir, url):
    filename = os.path.basename(url)
    logger.debug("Downloading " + filename)
    local_path = os.path.abspath(os.path.join(local_new_dir, filename))
    if(os.path.exists(local_path)):
        logger.debug("%s already exists, won't download again" % local_path)
        return local_path

    #req = urlopen(url)
    #CHUNK = 512*1024
    #with open(local_path, 'wb') as f:
    #    while True:
    #        chunk = req.read(CHUNK)
    #        if not chunk: break
    #        f.write(chunk)
    #    f.flush()
    #    f.close()
    ret, out, err = RunCmd('/usr/local/bin/axel -q -n 8 -o %s %s' % (local_path, url))
    if ret != 0:
        raise Exception('Cannot download file: %s, out: %s, err: %s' % (url, out, err))
    return local_path
Example #42
0
    def run(self):
	'''线程函数,工作线程在此函数中不断轮询任务队列,如果有任务则取出执行'''
        logger.debug('Work thread is running...')	
        while True:
	    # 检查停用事件
            if self.__dismissed.isSet():
                break
            try:
		# 从任务队列中获取WorkRequest实例
                request = self.__requestsQueue.get(True, self.__pollTimeout)
            except Queue.Empty:
                continue
           
            if self.__dismissed.isSet():
                self.__requestsQueue.put(request)
                break
            try:
		self.__isWorking = True
		# 执行请求实例的doWork方法
                request.doWork()
		self.__isWorking = False
            except Exception,e:
                pass
Example #43
0
    def joinAllDismissedWorkers(self):
	'''等待所有停用线程结束运行'''
        for worker in self.__dismissedWorkers:
            Worker.join()
	logger.debug('All dismissed woker is quit...')
        self.__dismissedWorkers = []
Example #44
0
 def test_downloadUrl(self):
     # make sure all the tgz files for a url is downloaded correctly
     url = "http://engweb.eng.vmware.com/bugs/files/0/1/2/4/9/0/9/7/esx-w2-erqa230-2014-05-09--23.23.tgz"
     local_path = downloadFile("/tmp/", url)
     logger.debug(local_path)
     self.assertEqual(os.path.getsize(local_path), 162180210)
Example #45
0
    def createWorkers(self, threadNum, pollTimeout):
	'''创建工作线程并将它们添加到线程池中'''
	logger.debug('Start create work thread...')
        for i in range(min(MAX_THREAD_NUM, threadNum)):
	    # 为工作线程传递任务队列
            self.__workers.append(WorkerThread(self.__requestsQueue, pollTimeout))
Example #46
0
    def dismiss(self):
	'''停用此线程'''
        logger.debug('work thread is dismiss...')	
        self.__dismissed.set()
Example #47
0
def dumpLogRecords(records):
    for entity in records.keys():
        for record in records[entity]:
            logger.debug(("%s, %s,%s") % (entity, record.timestamp, record.log))
Example #48
0
    def close(self, doJoin = False):
	'''关闭所有线程,doJoin为True则等待所有线程结束再返回,否则直接返回'''
        self.dismissWorkers(len(self.__workers) , doJoin)
        if doJoin:
            self.joinAllDismissedWorkers()
	logger.debug('All thread is quit...')