def handle(self): pool = self.server.pool data = self.request.recv(1024) if not data: logger.debug('self.request.recv null') return try: data = json.loads(data) search_url = data['url'] except: logger.debug('data error') return try: self.req_num = self.get_num() logger.debug('[begin]req_num: %d, search url: %s' % (self.req_num, search_url)) except Exception as e: logger.debug('search url: %s, error %s' % (search_url, str(e))) logger.exception(e) try: #extraInfo = data['extraInfo'] if not search_url: logger.debug('search_url null') logger.debug('[end]req_num: %d, search url: %s' % (self.req_num, search_url)) self.request.send('') else: link_url_list = Search().search(search_url) logger.debug('req_num: %d, google搜索返回的记录数 %d' % (self.req_num, len(link_url_list))) link_url_list = ['http:' + l if l[:2] == '//' else l for l in link_url_list] #link_url_list = [(l, req_num) for l in link_url_list] _results = [] for l in link_url_list: list_url = self.get_page_link_list(l) if config.crawl_level == 2: result = reduce(lambda _list, elem: _list.extend(elem) or _list, pool.map(self.get_page_link_list, [l[0] for l in list_url]), list()) for j in list_url: result.append((j[0], l)) _results.extend(result) else: for j in list_url: _results.append((j[0], l)) d = {} d['request'] = data d['response'] = _results #with open('out.json', 'w+') as f: for l in link_url_list: d['response'].append((l, search_url)) logger.debug('[end]req_num: %d, search url: %s, 搜索到的记录数 %d' % (self.req_num, search_url, len(d['response']))) out_str = json.dumps(d) self.request.send(out_str) except Exception as e: logger.debug('[end]req_num: %d, search url: %s, error %s' % (self.req_num, search_url, str(e))) logger.exception(e)
def get_data(self): self.snapshot = Snapshot(self,datetime.now()) try: #get an SSH clinet to communicate with the host client = paramiko.SSHClient() client.load_system_host_keys() client.set_missing_host_key_policy(paramiko.WarningPolicy) client.connect(self.url,username=self.username,password=self.password) logger.info("Grabbing data from %s, should take 15 seconds" % self.name) stdin, stdout, stderr = client.exec_command('top -b -d3 -n5') data = stdout.readlines() self.process_output(data) stdin, stdout, stderr = client.exec_command('users') data = stdout.readlines() self.process_output_users(data) client.close() logger.info("Got data from %s" % self.name) except Exception, e: logger.exception("Error in SSH on host %s" % self.name,e) try: client.close() except: pass return
def DoInstall(self, tmpDir, source): if not os.path.isdir(source): try: zip=zipfile.ZipFile(source) zipDir=zip.namelist()[0] zip.extractall(tmpDir) zip.close() except Exception as _e: self.ModuleInfo = xlt("Error extracting\n%s") % source logger.exception("Error extracting %s", source) return False source = os.path.join(tmpDir, zipDir) if self.modname == "Core": destination=adm.loaddir else: destination=os.path.join(adm.loaddir, self.modid) copytree(source, destination) try: shutil.rmtree(tmpDir) except: pass if self.modname == "Core": try: # Make start file executable for unpackaged files startFile=sys.argv[0] # usually admin4.py st=os.stat(startFile) os.chmod(startFile, st.st_mode | stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH) except: pass return True
def uptime(): """Gets the current uptime in a platform independent way. Returns: (long) The uptime in seconds. """ plat = code() up = 0 try: if plat == OSCode.Mac: up = mac.get_current_uptime() elif plat == OSCode.Linux: cmd = ['cat', '/proc/uptime'] process = subprocess.Popen(cmd, stdout=subprocess.PIPE) raw_output, _stderr = process.communicate() secs = raw_output.split()[0] # Truncate the decimals for Linux. up = long(float(secs)) except Exception as e: logger.error("Could not determine uptime.") logger.exception(e) return up
def computer_name(): """The FQDN of the machine. @return: The computer name. """ if code() == OSCode.Mac: try: process = subprocess.Popen( ['sysctl', 'kern.hostname'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) output, error = process.communicate() output = output.split(':') if len(output) > 1: return output[1].strip() except Exception as e: logger.error('Unable to process "sysctl kern.hostname".') logger.error('Falling back to socket for hostname.') logger.exception(e) return socket.getfqdn()
def queue_transform(self, job, page, *args, **kwds): global download_records page.out_queue = None img_download_queue, parse_queue, img_store_queue = "download", "parse", "store" download_queue = img_download_queue try: if page.is_img() and page.url not in download_records: page.out_queue = download_queue db.save("img_download", page.url) elif page.is_img() and page.url in download_records: page.out_queue = img_store_queue elif not page.is_empty() and not hasattr(page, 'parsed'): page.out_queue = parse_queue # elif not page.is_empty() and hasattr(page, 'parsed'): # page.out_queue = download_queue elif not page.is_empty() and not page.is_img() and hasattr(page, "parsed"): logger.info("add page child into download_queue") page.out_queue = download_queue db_save("parse", page.url) page.find_all_child() for child in page.childs: db_save("childs", child.url) return page except Exception as exc: logger.exception(exc) time.sleep(10) return page
def handle_error(self): logger.exception('Handle response error') error = asyncore.compact_traceback()[2] try: if self.response.context: self.response.context.on_error(error) finally: if self.response.will_close: self.response.close()
def run(self): logger.info('server thread start!!!') try: self.server = ThreadingTCPServer(('127.0.0.1', 50005), self.Handler) self.server.pool = ThreadPool(config.thread_num) #self.DebugThread(self.server.pool) self.server.serve_forever() except Exception as e: logger.error(str(e) + ' 具体栈回溯信息查看crit.log ') logger.exception(e)
def _send(template_name, data): rendered = _render_email(template_name, data) try: sender.send(config['statsEmailRecipients'], config['emailUsername'], u'FeedbackDecryptor: ' + template_name.capitalize(), yaml.safe_dump(data, default_flow_style=False), rendered) except smtplib.SMTPException: logger.exception()
def parse(*args, **kwds): page = kwds.get("page") page.parsed = True try: logger.info("at parsing %r url deepth:%r" % (page.url, page.deepth)) if page.fill_html(): page.find_all_child() return page except Exception as e: logger.exception(e) return page
def main(): logger.log('Starting up') signal.signal(signal.SIGTERM, _do_exit) while True: try: statschecker.go() except Exception: logger.exception() time.sleep(60) continue
def go(): # Note that `_diagnostic_record_iter` throttles itself if/when there are # no records to process. for diagnostic_info in _diagnostic_record_iter(): try: _process_diagnostic_info(diagnostic_info) except Exception as e: if _DEBUG: raise logger.exception() logger.error(str(e))
def main(): logger.log("Starting up") signal.signal(signal.SIGTERM, _do_exit) while True: try: maildecryptor.go() except Exception: logger.exception() time.sleep(60) continue
def go(): # Retrieve and process email-to-diagnostic-info records. # Note that `_email_diagnostic_info_records` throttles itself if/when # there are no records immediately available. for email_diagnostic_info in _email_diagnostic_info_records_iterator(): # Check if there is (yet) a corresponding diagnostic info record diagnostic_info = datastore.find_diagnostic_info(email_diagnostic_info['diagnostic_info_record_id']) if not diagnostic_info: continue # Modifies diagnostic_info _clean_diagnostic_info_for_yaml_dumping(diagnostic_info) # Convert the modified YAML back into a string for emailing. diagnostic_info_text = yaml.safe_dump(diagnostic_info, default_flow_style=False, width=75) try: diagnostic_info_html = mailformatter.format(diagnostic_info) except Exception as e: logger.error('format failed: %s' % str(e)) diagnostic_info_html = None # If we get to here, then we have a valid diagnostic email. # Reply with the decrypted content. # If this is not a reply, set a subject # If no subject is pre-determined, create one. if email_diagnostic_info.get('email_id') is None: subject = u'DiagnosticInfo: %s (%s)' % (diagnostic_info['Metadata'].get('platform', '[NO_PLATFORM]').capitalize(), diagnostic_info['Metadata'].get('id', '[NO_ID]')) else: subject = u'Re: %s' % (email_diagnostic_info['email_subject'] or '') try: sender.send_response(config['decryptedEmailRecipient'], config['emailUsername'], subject, diagnostic_info_text, diagnostic_info_html, email_diagnostic_info.get('email_id'), # may be None None) # no attachment logger.log('decrypted formatted email sent') except smtplib.SMTPException as e: logger.exception() logger.error(str(e)) # Delete the processed record. (Note that sending the email might have # failed, but we're deleting it anyway. This is a debatable decision.) datastore.remove_email_diagnostic_info(email_diagnostic_info)
def comment(to_username, from_username, id, amount=None, months=None): logger.log("Logging in...") r = praw.Reddit(client_id=config['reddit']['client_id'], client_secret=config['reddit']['client_secret'], username=config['reddit']['username'], password=config['reddit']['password'], user_agent='bot') try: commentable_thing = thing.find_thing(id, r) expire = redis_client.ttl('comment') if expire > 0: logger.log('Current expire on comment: ' + str(expire)) time.sleep(expire) redis_client.set('comment', 1) redis_client.expire('comment', 600) from_username = '******' + from_username + '](https://reddit.com/user/' + from_username + ')' if from_username else 'anonymous user' if amount: commentable_thing.reply(comment_template.USER_TIPPED.format( to_username='******' + to_username + '](https://reddit.com/user/' + to_username + ')', from_username=from_username, ethAmount=amount, github='https://github.com/DecenterApps/Extend', webstore='https://chrome.google.com/webstore/detail/extend/babconedajpngaajmlnnhpahcladpcna', blog='https://blog.decenter.com/2017/11/14/extend/')) logger.log("Commented: " + id + ", " + from_username + " tipped " + amount + " " + to_username, slack=True) if months: commentable_thing.reply(comment_template.BOUGHT_GOLD.format( to_username='******' + to_username + '](https://reddit.com/user/' + to_username + ')', from_username=from_username, months=months + ' month' if months == '1' else months + ' months', github='https://github.com/DecenterApps/Extend', webstore='https://chrome.google.com/webstore/detail/extend/babconedajpngaajmlnnhpahcladpcna', blog='https://blog.decenter.com/2017/11/14/extend/')) logger.log("Commented: " + id + ", " + from_username + " gilded " + months + " month(s) " + to_username, slack=True) return True except requests.exceptions.ConnectionError as e: logger.exception(e.response) return False except Exception as e: logger.exception(e) return False
def handle_error(error_type, exception="", message=""): logger.error("%s:%s" % (error_type.name, error_type.value)) # exceptionが空でなければエラー出力 if not exception: logger.exception(exception) # messageが空でなければエラー出力 if not message: logger.error(message) # 異常コードで終了させる sys.exit(1)
def load(filepath, appname=''): """Load the settings from the file, and creates it if it does not exist.""" if not os.path.exists(filepath): save(filepath, appname=appname) with open(filepath, 'r') as f: preferences_string = f.read() # Parse the preferences string. preferences = {} try: exec(preferences_string, {}, preferences) except Exception as e: log.exception("An exception occurred in the user preferences file.") return preferences
def __create_elements_matrix(board_size, response_dict): """Creates a matrix of board elements""" elements_matrix = [[0] * board_size['sizeX'] for i in range(board_size['sizeY'])] for element in response_dict['elements']: for i in range(1, element['sizeY'] + 1): for j in range(1, element['sizeX'] + 1): try: elements_matrix[element['posY'] + i - 2][element['posX'] + j - 2] = 1 except IndexError as e: logger.exception('own_adapter', 'Failed to create elements matrix. Error message: {}'.format(str(e))) elements_matrix = [[1] * board_size['sizeX'] for i in range(board_size['sizeY'])] return elements_matrix return elements_matrix
def download(*args, **kwds): global download_records logger.debug("at download... ") page = kwds.get("page") try: page.download() logger.info("download status: %r url: %r " % (page.status_code, page.url)) except Exception as exc: logger.exception(exc) time.sleep(100) download_records.add(page.url) return page
def store(kls, *args, **kwds): global img_download_counter page = kwds.get("page") logger.info("store page: %r deepth :%r" % (page.url, page.deepth)) try: page.store() kls.add_img(page.url) page = None return True, page except Exception as error: logger.exception(error) return False, page
async def _open_connection(self): while True: try: logger.info( f"{type(self).__name__}: Trying to connect to {self._ip}:{self._port}..." ) connection = await curio.open_connection(self._ip, self._port) await self._serve_client(connection, (self._ip, self._port)) except (TimeoutError, ConnectionError, OSError) as e: # Reconnect if these errors occur logger.exception(F"{type(self).__name__}: Exception:", e) if self._close: raise await curio.sleep(5)
def fetchPosts(subredditName: str, limit: int, categ: str): try: if categ == "top": return reddit.subreddit(subredditName).top(limit=limit) elif categ == "new": return reddit.subreddit(subredditName).new(limit=limit) elif categ == "controversial": return reddit.subreddit(subredditName).controversial(limit=limit) elif categ == "rising": return reddit.subreddit(subredditName).rising(limit=limit) elif categ == "best": return reddit.subreddit(subredditName).best(limit=limit) except Exception as e: logger.exception(e) return NULL
def load_configuration(self): config_path = self._get_config_path().as_posix() logger.info("Loading configuration from {}".format(config_path)) try: with open(config_path, 'rb') as f: self.data = pickle.load(f) except FileNotFoundError: logger.warning("Configuration file not found.") self._load_default_configuration() except IOError: logger.exception("Opening configuration failed") self._load_default_configuration() else: logger.info("Configuration loaded successfully from the file.") logger.debug("Loaded config: {}".format(self.data))
def get_routes_api_call(filters): start = time.time() try: resp = requests.get(url=f'{API_URL_BASE}routes?filter[type]={filters}') finish = time.time() logger.debug( f"API response received after {'%.5f'%(finish - start)} seconds.") return resp except requests.exceptions.ConnectionError: logger.exception(requests.exceptions.RequestException) finish = time.time() logger.error( f"API connection refused after {'%.5f'%(finish - start)} seconds.") return None
def _recv(self): with Lock(self): self.blocking = True try: return self.recver.recv() except TimeoutError: pass except EOFError: raise except BaseException as e: logger.exception(e) finally: with Lock(self): if not self.blocking: raise EOFError self.blocking = False
def scan_port( ip, port): banner = '' try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.connect((ip, port)) except Exception as e: logger.exception(e.message) return try: banner = sock.recv(1024) sock.close() if len(banner) <= 2: banner = 'NULL' except Exception, e: banner = 'NULL'
def get(): try: url = request.args.get("url") if not url: return service_response(400, "missing mandatory variable") json_data, shape = csv_to_json(url, request.args) logger.debug("Fetched (rows, columns)=%s amount of data from %s" % (shape, url)) return Response(response=json_data, status=200, headers={"Content-Type": "application/json"}) except Exception as e: logger.exception(e) return service_response(500, str(e))
def setup(M): ''' Viene inizializzato curses e impostato il GPIO. Vengono costruite le pagine tramite buildPages() e mostrata la schermata di benvenuto ''' logger.info(IFACE,'setup()') global machines, machineManager machines = M machineManager = M.machineManager global stdscr, DB stdscr = curses.initscr() h,w = stdscr.getmaxyx() if not (H,W) == (h,w): logger.warning(IFACE,'curses ha inizializzato una finestra di dimensione diversa rispetto al terminale') logger.warning(IFACE,'per riferimento, terminale:'+str(H)+'x'+str(W)+', curses:'+str(h)+'x'+str(w)) logger.info(IFACE,'inizializzata finestra di H='+str(H)+' righe e W='+str(W)+' colonne') curses.noecho() curses.cbreak() curses.curs_set(0) stdscr.nodelay(1) stdscr.keypad(True) curses.start_color() curses.use_default_colors() try: curses.init_pair(1, 245, -1) curses.init_pair(2, 51, -1) curses.init_pair(3, 214, -1) curses.init_pair(10, -1, 39) curses.init_pair(11, -1, 245) except: logger.warning(IFACE,'non sono riuscito a inizializzare i colori') GPIO.setmode(GPIO.BCM) GPIO.setwarnings(False) DB = dbhandler.DataBase('mdp.sqlite') try: buildPages(M) except Exception as e: logger.exception(IFACE,'errore nella costruzione delle pagine:') quitTrigger = True cleanup() raise e setActivePage('welcome')
def remove_element(self, element_url): """Removes an element from the board""" try: http_method = 'DELETE' detail = 'element' url = element_url values = {} headers = self.__platform_access.get_headers(http_method, url, values, detail) elements_request = request.Request(url, headers=headers) elements_request.get_method = lambda: http_method response = request.urlopen(elements_request) return response.getcode() except urllib.error.HTTPError as e: logger.exception('own_adapter', 'Error: remove element {} from {} failed. Error type: {}'.format(element_url, self.get_name(), str(e))) return e.code
def get_scan_lists(pid): timeout = Config.NSCAN_TIMEOUT open_ip_ports = {} try: masscan_result = mongo[Config.MONGODB_C_MSCAN].find({'pid':pid}, {'data':1, '_id':0}) for data in masscan_result: open_ip_ports = data['data'] # shuffle scan ip:port for bypassing firewall scan_list = [] for ip, ports in open_ip_ports.items(): for port in ports: scan_list.append(ip+':'+port) random.shuffle(scan_list) return scan_list except Exception as ex: logger.exception(ex.message)
def update_flow(flow_id, flow, correlation_id): try: old_flow = flow_utils.get_old_flow(flow) old_flow_path = json.loads(old_flow['flowpath'])['path'] logger.info('Flow path remove: %s', old_flow_path) flow_utils.remove_flow(old_flow, old_flow_path) logger.info('Flow was removed: correlation_id=%s, flow_id=%s', correlation_id, flow_id) rules = flow_utils.build_rules(flow) logger.info('Flow rules were built: correlation_id=%s, flow_id=%s', correlation_id, flow_id) flow_utils.store_flow(flow) logger.info('Flow was stored: correlation_id=%s, flow_id=%s', correlation_id, flow_id) message_utils.send_install_commands(rules, correlation_id) logger.info('Flow rules installed: correlation_id=%s, flow_id=%s', correlation_id, flow_id) message_utils.send_delete_commands(old_flow_path, old_flow['flowid'], correlation_id, int(old_flow['cookie'])) logger.info('Flow rules removed: correlation_id=%s, flow_id=%s', correlation_id, flow_id) payload = {'payload': flow, 'message_type': "flow"} message_utils.send_message(payload, correlation_id, "INFO") except Exception as e: logger.exception('Can not update flow: %s', e.message) message_utils.send_error_message(correlation_id, "UPDATE_FAILURE", e.message, flow_id) raise return True
def upload_to_s3(s3_uploader_obj, file_info, dir_s3_key, current_datetime): try: #eg. QA/tomcat/20-2-2020/logmanager.txt file_s3_upload_key = f"{settings.S3_BUCKET_UPLOAD_DIR_ROOT}/{dir_s3_key}/{current_datetime.date()}/{file_info.name}" if s3_uploader_obj.upload_file(file_info.path, settings.S3_BUCKET_NAME, file_s3_upload_key): logger.info( f"[upload_to_s3]:[Uploaded][{file_info.path}][{settings.S3_BUCKET_NAME}][{file_s3_upload_key}]" ) return True else: logger.info( f"[upload_to_s3]:[Upload failed][{file_info.path}][{settings.S3_BUCKET_NAME}][{file_s3_upload_key}]" ) except Exception as e: logger.exception(e) return False
def send(apiUrl,data,method=None): logger.debug("调用内部系统[%s],data[%r]",apiUrl,data) try: data_json = json.dumps(data) headers = {'Content-Type': 'application/json'} # 设置数据为json格式,很重要 request = urllib2.Request(url=apiUrl, headers=headers, data=data_json) if method is not None: request.get_method = method response = urllib2.urlopen(request) result = {'code':response.getcode(),'content':response.read()} logger.debug("调用[%s]返回结果:%r",apiUrl,result) return result except Exception as e: #traceback.print_stack() logger.exception(e,"调用内部系统[%s],data[%r],发生错误[%r]", apiUrl, data,e) return None
def finish(self, data): ''' Finishes handling an Json-RPC request by sending a response to the corresponding client. ''' try: self.send_response(200) for key, value in HTTP_HEADERS.iteritems(): self.send_header(key, value) self.send_header('Content-Length', str(len(data))) self.end_headers() self.wfile.write(data) self.close() except socket.error: logger.exception('Send response error') finally: sys.exc_traceback = None # Help garbage collection
def update_element(self, pos_x, pos_y, new_element_link, size_x=1, size_y=1, caption=''): try: http_method = 'PUT' detail = 'element' url = self.__platform_access.get_platform_url() + new_element_link payload = """ { "element": { "posX": \"""" + str(pos_x) + """\", "posY": \"""" + str(pos_y) + """\", "sizeX": \"""" + str(size_x) + """\", "sizeY": \"""" + str(size_y) + """\", "type": "MultiInput", "caption": \"""" + caption + """\" } } """ payload = json.dumps(json.loads(payload), separators=(',', ':')) values = {} headers = self.__platform_access.get_headers(http_method, url, values, detail, payload=payload) add_name_request = request.Request(url, headers=headers, data=payload.encode()) add_name_request.get_method = lambda: http_method response = request.urlopen(add_name_request) response_status = response.getcode() return response_status except urllib.error.HTTPError as e: logger.exception( 'own_adapter', 'Error: add element name {} to {} failed. Error type: {}'. format(caption, self.get_name(), str(e))) return e.code
def func_wrap2(*args, **kargs): sig = _mk_cache_sig(*args, **kargs) key = "%s:%s" % (func.__name__, sig) data_json = rs.get(key) if data_json is not None: obj_inst = obj_type() obj_inst.from_dict(json.loads(data_json)) return obj_inst obj_inst = func(*args, **kargs) if obj_inst is not None: data_json = json.dumps(obj_inst.to_dict()) try: rs.set(key, data_json, ex=ttl) except: exception( "Exception while trying to set {key} to {data_json}". format(key=key, data_json=data_json)) return obj_inst
async def _receive_datagram(self): received = "" try: # First receive start of nmea-message (either '$' or '!') received = "" while received != "$" and received != "!": received = await self._io_device.read(1) while True: received += await self._io_device.read(1) if received[-1] == "\n": self._logger.info(received, ingoing=True) return received except TypeError as e: logger.exception(f"{self.get_name()}: Error when reading. Wrong encoding?", e) self._logger.error(received, ingoing=True) return ""
async def send(message: discord.Message, *args): if (len(args) != 2 or args[0] not in categories): logger.log("invalid Syntax") await message.channel.send("Invalid Syntax") return try: for post in fetchPosts(args[1], 5, args[0]): if (not (post == NULL)): embedd = getEmbed(post) if (not (embedd == NULL)): await message.channel.send(embed=embedd) logger.log("Posted " + args[0] + " from r/" + args[1] + " for " + str(message.author)) except Exception as e: logger.exception(e) await message.channel.send("Error occured! Check the log file.")
def setRecipe(self, recipe): logger.debug(IFACE,'setRecipe('+str(recipe)+')') try: DB.setMachineStat( self.machinename, 'recipe', recipe ) if recipe: DB.setMachineStat( self.machinename, ('block','step','progress','blockprogress'), (1,1,0.0,0.0) ) else: DB.setMachineStat( self.machinename, ('block','step','progress','blockprogress'), (None,None,None,None) ) except: logger.exception(IFACE,'errore nell\'impostare la ricetta')
def give(to_username, from_address, months, id, reply, block_number): logger.log("Logging in...") r = praw.Reddit(client_id=config['redditGold']['client_id'], client_secret=config['redditGold']['client_secret'], username=config['redditGold']['username'], password=config['redditGold']['password'], user_agent='bot') try: gildable_thing = thing.find_thing(id, r) gildable_thing.gild() if months != '1': r.redditor(to_username).gild(months=int(months) - 1) logger.log("Gilded: " + id + " " + from_address + " gilded " + months + " month(s) " + to_username, slack=True) if reply: time.sleep(3) connection = pika.BlockingConnection(pika.ConnectionParameters('localhost')) channel = connection.channel() channel.queue_declare(queue='tip') channel.basic_publish(exchange='', routing_key='tip', body=json.dumps({'username': to_username, 'fromAddress': from_address, 'months': months, 'id': id, 'blockNumber': block_number})) logger.log("Queued for commenting: " + id) connection.close() except requests.exceptions.ConnectionError as e: logger.exception(e.response) return False except Exception as e: logger.exception(e) return False time.sleep(1)
def _sched(self): ''' different queue masterd through this schedu ''' logger.info(" at sched ..") job, i, func, args, kwds, result = self.task g = gevent.getcurrent() g.working = job g.page = kwds.get("page") and kwds.get("page").url logger.info("got job %r %r page url = %r" % (job, func, kwds["page"].url)) try: status, page = (True, func(*args, **kwds)) except Exception as e: result = (False, e) logger.exception(e) return if not page: return current_job, current_page = job, page if current_job == "store": return try: page = self.queue_transform(job=current_job, page=page) next_step = page.out_queue except Exception as e: logger.exception(e) try: logger.info("at sched .. current_job: %r next_job: %r with page: %r" % (current_job, next_step, page.url) ) if next_step == "store": try: task = ( "store", None, self.store, (), {"page": page}, result) self.store_queue.put(task) del task except Exception, e: logger.exception("exception : %s " % (sys.exc_info()[1])) elif next_step == "parse": logger.debug( "generate next task %s to queue %r" % (next_step, page.url)) try: task = ( "parse", None, self.parse, (), {"page": page}, result) self.parse_queue.put(task) del task except Exception, e: logger.exception(e)
def updateList(self): logger.debug(IFACE,self.title+' sta eseguendo updateList(self)') self.page.erase() self.page.box() self.addstr(2, 2, l10n.machinesMain.selectPrompt) i = 1 try: machines = DB.getMachines() if machines == []: self.addstr(4, 4, l10n.machinesMain.noneConfigured) self.addstr(5, 4, l10n.machinesMain.pleaseConfigure) else: for machine in machines: name, template = machine self.addstr(3+i, 4, str(i)+'. '+name+' ('+template+')') i+=1 except: logger.exception(IFACE,'updateList() di '+self.title+' non è andato a buon fine.')
def create_radius_session(radiusnas, radiususer, accnt_sessionid, framed_ip_address): radiussession = models.Radiussessions(nas_id=radiusnas.id, radiususer_id=radiususer.id, mac=radiususer.mac, accnt_sessionid=accnt_sessionid, framed_ip_address=framed_ip_address) session = get_db_session() try: session.add(radiussession) session.commit() except SQLAlchemyError as e: session.rollback() session.close() exception('Exception while trying to create session') return None else: session.close() return radiussession
def connection_keep_recv(sx, addr): global deadConnection while sx not in deadConnection: try: data = sx.recv(1024) if data == b'': terminate_connection(sx, addr) logger.log('Exit thread', address=addr) break try: parseData(sx, addr, data) except Exception as e: logger.handled_exception( 'Unable to parse data, dropped...', address=addr, exception =e) except socket.error: terminate_connection(sx,addr) except Exception as e: logger.exception(e)
def __init__(self, address, interface, timeout=None, encoding=None, logging=None): if (not isinstance(interface, type) or not issubclass(interface, JsonRpcIface)): raise TypeError('Interface must be JsonRpcIface subclass') self.interface = interface self.timeout = timeout self.encoding = encoding or 'utf-8' logger.setup(logging) try: asyncore.dispatcher.__init__(self) self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self.bind(address) self.listen(0) except Exception: logger.exception('Server run error') raise
def get_isls(): try: query = "MATCH (a:switch)-[r:isl]->(b:switch) RETURN r" result = graph.run(query).data() isls = [] for data in result: link = data['r'] isl = { 'id': str(link['src_switch'] + '_' + str(link['src_port'])), 'speed': int(link['speed']), 'latency_ns': int(link['latency']), 'available_bandwidth': int(link['available_bandwidth']), 'state': "DISCOVERED", 'path': [{ 'switch_id': str(link['src_switch']), 'port_no': int(link['src_port']), 'seq_id': 0, 'segment_latency': int(link['latency']) }, { 'switch_id': str(link['dst_switch']), 'port_no': int(link['dst_port']), 'seq_id': 1, 'segment_latency': 0 }], 'message_type': 'isl' } isls.append(isl) logger.info('Got isls: %s', isls) except Exception as e: logger.exception('Can not get isls', e.message) raise return isls
def post(): try: url = request.args.get("url") if not url: return service_response(400, "missing mandatory variable") csv_data, shape, csv_encoding = json_to_csv(request.get_json(), request.args) logger.debug("POSTing (rows, columns)=%s amount of data to %s" % (shape, url)) r = requests.post(url, data=csv_data.encode(csv_encoding), headers={ "Content-Type": "text/csv; charset: {}".format(csv_encoding) }) return service_response(r.status_code, r.text) except Exception as e: logger.exception(e) return service_response(500, str(e))
def update_radiussession(id, data_used=None, time_used=None, disassoc=None): session = get_db_session() radiussession = session.query(models.Radiussessions).get(id) radiussession.lastseen_time = arrow.utcnow().naive if data_used: radiussession.data_used = data_used if time_used: radiussession.duration = int(time_used) if disassoc: radiussession.disassoc_time = arrow.utcnow().naive try: session.commit() except SQLAlchemyError as e: session.rollback() session.close() exception('Exception while trying to create session') return None else: session.close() return radiussession
def __init__(self, parent): logger.debug(IFACE,'creazione di un GaugesBox') self.template = parent.template templatename = self.template['name'] nrows, ncols = 6,19 prow, pcol = 3,parent.w-ncols-2 if self.template['rotpin']: nrows += 1 try: self.win = parent.page.derwin(nrows,ncols,prow,pcol) self.win.box() self.gaugesDict = {} self.addstr(1, 1, l10n.machineDials['motor']) self.gaugesDict['MOT'] = Gauge( parentwindow=self.win, prow=1, pcol=ncols-2, pins=[self.template['motpin']]) last = nrows-2 self.addstr(last-2, 1, l10n.machineDials['heater']) if self.template['th2pin']: self.gaugesDict['THM'] = Gauge( parentwindow=self.win, prow=last-2, pcol=ncols-2, pins=[self.template['th1pin'],self.template['th2pin']]) else: self.gaugesDict['THM'] = Gauge( parentwindow=self.win, prow=last-2, pcol=ncols-2, pins=[self.template['th1pin']]) if self.template['rotpin']: self.addstr(2, 1, l10n.machineDials['rotation']) self.gaugesDict['ROT'] = Gauge( parentwindow=self.win, prow=2, pcol=ncols-2, pins=[self.template['rotpin']]) self.addstr(last-1, 1, l10n.machineDials['temperature']) self.addstr(last-1, ncols-3, '°C') self.tempReading = TempReading( parentwindow=self.win, prow=last-1, pcol=ncols-6, thermometer=parent.thermometer ) self.addstr(last, 1, 'R') self.addstr(last, ncols-3, 'kΩ') self.resistReading = ResistReading( parentwindow=self.win, prow=last, pcol=ncols-10, thermometer=parent.thermometer ) except Exception as e: logger.exception(IFACE,'qualcosa è andato storto:') cleanup() raise e
def ReadStatistics(self): if not self.server.settings.get('statsport'): return None try: response=requests.get("http://%s:%d" % (self.server.settings['host'], self.server.settings['statsport']), timeout=self.server.settings.get('timeout', 1.)) response.raise_for_status() txt=response.text except Exception as _e: return None try: root=xmltree.fromstring(txt) except Exception as _e: import logger, adm, time fname="%s/xml-%s_%s.xml" % (adm.loaddir, self.server.settings['host'], time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))) logger.exception("Error parsing BIND response %s", fname) f=open(fname, "w") f.write(txt) f.close() return None return root
def _process_command(self, cmd, sender): # this function is runned by a worker thread logger = self._logger.getChild('worker') try: arg_list = cmd.split() logger.debug('get cmd: ' + str(arg_list)) args, unknown_args = self._cmd_parser.parse_known_args(arg_list) if len(unknown_args) > 0: logger.debug('unknown args: ' + str(unknown_args)) # pragma: no cover args.proc(args, sender) except SystemExit as e: # TODO maybe these processes could be hided in to cmd parser if e.code == 2: # reach here if no sub command is found in the cmd # direct command is processed here matched = self._cmd_parser.process_direct_commands(cmd, sender) if not matched: # if no direct command is matching # response to user that we cannot recognize the command logger.debug('no known args found.') sender.send_message('Unknown commands.') else: logger.exception('Unexpected SystemExit') # pragma: no cover
def DoInstall(self, tmpDir, source): if not os.path.isdir(source): try: zip=zipfile.ZipFile(source) zipDir=zip.namelist()[0] zip.extractall(tmpDir) zip.close() except Exception as _e: self.ModuleInfo = xlt("Error extracting\n%s") % source logger.exception("Error extracting %s", source) return False source = os.path.join(tmpDir, zipDir) if self.modname == "Core": destination=adm.loaddir else: destination=os.path.join(adm.loaddir, self.modid) copytree(source, destination) try: shutil.rmtree(tmpDir) except: pass return True
def search(self, url): mc = MyCurl(accept_encoding='gzip, deflate', time_out=config.vc_wait_time) _count = 0 while True: try: self.reset_proxy_ip() m, v = self.mc.get_page(url) except Exception as e: if _count > 3: logger.debug('获取页面错误 超过 3次 返回null') return [] else: logger.exception(e) _count += 1 logger.error('proxy not working..' + self.proxy + str(e)) continue if m['http-code'] == 403: if _count > 3: logger.debug('获取页面错误 超过 3次 返回null') return [] _count += 1 logger.error('proxy not working..' + self.proxy + ' : 403') continue try: if m['http-code'] == 503: logger.debug('http_code: 503 需要验证码') count = 0 while True: page = gzdecode(v).decode('utf-8', 'ignore') d = pq(page) href = 'https://ipv4.google.com' + d('img').attr('src') logger.debug('img url ' + href) _uuid = uuid.uuid1().get_hex() localtime = time.localtime() tmp_name = str(localtime[0]) + str(localtime[1]) + str(localtime[2]) + _uuid[0:8] + _uuid[16:20] file_path = tmp_name logger.debug('file_path ' + file_path) self.mc.down_file(href, file_path) upload_file_list = [] upload_file_list.append( ( 'image', 'imagecode', file_path, ) ) m, v = mc.upload_file('http://192.168.60.240:8780/verificationCodeFromInputStream', upload_file_list) # os.remove(file_path) result = json.loads(v) logger.debug(result) verificationCode = result['verificationCode'] logger.debug('识别出的验证码: %s' % verificationCode) _d = {} for l in d('input').items(): if l.attr('type') == 'hidden': k, v = l.attr('name'), l.attr('value') logger.debug(v) _d[k] = v url = 'https://ipv4.google.com/sorry/CaptchaRedirect?q=' + _d['q'] + '&continue=' + urllib.quote_plus(_d['continue']) + '&id=' + _d['id'] \ + '&captcha=' + result['verificationCode'] + '&submit=' + urllib.quote('提交') logger.debug('submit url ' + href) m, v = self.mc.get_page(url) logger.debug(m) logger.debug(v) if m['http-code'] == 503: logger.debug('验证码识别错误') if count > 3: logger.debug('验证码识别错误 超过 3次 返回null') return [] else: count += 1 continue else: break break except Exception as e: logger.error(str(e) + ' 具体栈回溯信息查看crit.log ') logger.exception(e) # if file_path: # os.remove(file_path) return [] page = gzdecode(v).decode('utf-8', 'ignore') logger.debug(page) d = pq(page) _l = [] for l in d('.r>a').items(): _l.append(l.attr('href')) return _l
def initialize(config_file): with INIT_LOCK: global CONFIG global CONFIG_FILE global _INITIALIZED global CURRENT_VERSION global LATEST_VERSION global UMASK global POLLING_FAILOVER CONFIG = pacvert.config.Config(config_file) CONFIG_FILE = config_file assert CONFIG is not None if _INITIALIZED: return False #if CONFIG.HTTP_PORT < 21 or CONFIG.HTTP_PORT > 65535: # pacvert.logger.warn( # 'HTTP_PORT out of bounds: 21 < %s < 65535', CONFIG.HTTP_PORT) # CONFIG.HTTP_PORT = 8181 #if not CONFIG.HTTPS_CERT: # CONFIG.HTTPS_CERT = os.path.join(DATA_DIR, 'server.crt') #if not CONFIG.HTTPS_KEY: # CONFIG.HTTPS_KEY = os.path.join(DATA_DIR, 'server.key') if not CONFIG.LOG_DIR: CONFIG.LOG_DIR = os.path.join(DATA_DIR, 'logs') if not os.path.exists(CONFIG.LOG_DIR): try: os.makedirs(CONFIG.LOG_DIR) except OSError: CONFIG.LOG_DIR = None if not QUIET: sys.stderr.write("Unable to create the log directory. " \ "Logging to screen only.\n") if not CONFIG.OUTPUT_DIRECTORY: CONFIG.OUTPUT_DIRECTORY = os.path.join(DATA_DIR, 'output') if not os.path.exists(CONFIG.OUTPUT_DIRECTORY): try: os.makedirs(CONFIG.OUTPUT_DIRECTORY) except OSError: if not QUIET: sys.stderr.write("Unable to create the output directory.") # Start the logger, disable console if needed logger.initLogger(console=not QUIET, log_dir=CONFIG.LOG_DIR, verbose=VERBOSE) if not CONFIG.BACKUP_DIR: CONFIG.BACKUP_DIR = os.path.join(DATA_DIR, 'backups') if not os.path.exists(CONFIG.BACKUP_DIR): try: os.makedirs(CONFIG.BACKUP_DIR) except OSError as e: logger.error("Could not create backup dir '%s': %s" % (CONFIG.BACKUP_DIR, e)) #if not CONFIG.CACHE_DIR: # CONFIG.CACHE_DIR = os.path.join(DATA_DIR, 'cache') #if not os.path.exists(CONFIG.CACHE_DIR): # try: # os.makedirs(CONFIG.CACHE_DIR) # except OSError as e: # logger.error("Could not create cache dir '%s': %s" % (CONFIG.CACHE_DIR, e)) # Initialize the database #logger.info('Checking to see if the database has all tables....') #try: # dbcheck() #except Exception as e: # logger.error("Can't connect to the database: %s" % e) # Check if pacvert has a uuid #if CONFIG.PMS_UUID == '' or not CONFIG.PMS_UUID: # my_uuid = generate_uuid() # CONFIG.__setattr__('PMS_UUID', my_uuid) # CONFIG.write() # Get the currently installed version. Returns None, 'win32' or the git # hash. try: CURRENT_VERSION, CONFIG.GIT_BRANCH = versioncheck.getVersion() except TypeError as e: logger.error("Something went terribly wrong by checking for the current version: "+str(e)) # Write current version to a file, so we know which version did work. # This allowes one to restore to that version. The idea is that if we # arrive here, most parts of pacvert seem to work. if CURRENT_VERSION: version_lock_file = os.path.join(DATA_DIR, "version.lock") try: with open(version_lock_file, "w") as fp: fp.write(CURRENT_VERSION) except IOError as e: logger.error("Unable to write current version to file '%s': %s" % (version_lock_file, e)) # Check for new versions if CONFIG.CHECK_GITHUB_ON_STARTUP and CONFIG.CHECK_GITHUB: try: LATEST_VERSION = versioncheck.checkGithub() except: logger.exception("Unhandled exception") LATEST_VERSION = CURRENT_VERSION else: LATEST_VERSION = CURRENT_VERSION # Store the original umask UMASK = os.umask(0) os.umask(UMASK) _INITIALIZED = True return True
def go(): logger.debug_log('go: enter') # Note that `_diagnostic_record_iter` throttles itself if/when there are # no records to process. for autoresponder_info in _autoresponder_record_iter(): diagnostic_info = autoresponder_info.get('diagnostic_info') email_info = autoresponder_info.get('email_info') logger.debug_log('go: got autoresponder record') # For now we don't do any interesting processing/analysis and we just # respond to every feedback with an exhortation to upgrade. reply_info = _get_email_reply_info(autoresponder_info) if not reply_info or not reply_info.address: # If we don't have any reply info, we can't reply logger.debug_log('go: no reply_info or address') continue # Check if the address is blacklisted if _check_and_add_address_blacklist(reply_info.address): logger.debug_log('go: blacklisted') continue responses = _analyze_diagnostic_info(diagnostic_info, reply_info) if not responses: logger.debug_log('go: no response') continue logger.log('Sending feedback response') for response_id in responses: response_content = _get_response_content(response_id, diagnostic_info) if not response_content: logger.debug_log('go: no response_content') continue # The original diagnostic info may have originated from an email, # in which case we have a subject to reply to. Or it may have have # originated from an uploaded data package, in which case we need # set our own subject. if type(reply_info.subject) is dict: subject = u'Re: %s' % reply_info.subject.get('text', '') else: subject = response_content['subject'] try: sender.send_response(reply_info.address, config['reponseEmailAddress'], subject, response_content['body_text'], response_content['body_html'], reply_info.message_id, response_content['attachments']) except Exception as e: logger.debug_log('go: send_response excepted') logger.exception() logger.error(str(e))
import os from {{package}}.bootstrap import app from {{package}} import logger """ Alias to be compatible w/ AWS or heroku (beanstalk) """ application = app if __name__ == '__main__': try: port = int(os.environ.get('{{package}}_PORT', 8000)) application.run(debug=True, host='0.0.0.0', port=port) except Exception: """ don't ever catch SystemExit's here this could bug the reload in flasks webserver """ logger.exception("caught in run.py") # RESTART raise SystemExit(3)
def go(): logger.debug_log('maildecryptor.go start') emailgetter = EmailGetter(config['popServer'], config['popPort'], config['emailUsername'], config['emailPassword']) # Retrieve and process email. # Note that `emailgetter.get` throttles itself if/when there are no emails # immediately available. for msg in emailgetter.get(): logger.debug_log('maildecryptor.go: msg has %d attachments' % len(msg['attachments'])) diagnostic_info = None # # First try to process attachments. # for attachment in msg['attachments']: # Not all attachments will be in our format, so expect exceptions. try: encrypted_info = attachment.getvalue() encrypted_info = json.loads(encrypted_info) diagnostic_info = decryptor.decrypt(encrypted_info) diagnostic_info = diagnostic_info.strip() diagnostic_info = _load_yaml(diagnostic_info) # Modifies diagnostic_info utils.convert_psinet_values(config, diagnostic_info) if not utils.is_diagnostic_info_sane(diagnostic_info): # Something is wrong. Skip and continue. continue # Modifies diagnostic_info datatransformer.transform(diagnostic_info) logger.log('email attachment decrypted') break except decryptor.DecryptorException as e: # Something bad happened while decrypting. Report it via email. logger.exception() try: sender.send(config['decryptedEmailRecipient'], config['emailUsername'], u'Re: %s' % (msg['subject'] or ''), 'Decrypt failed: %s' % e, msg['msgobj']['Message-ID']) except smtplib.SMTPException as e: # Something went wrong with the sending of the response. Log it. logger.exception() logger.error(str(e)) except (ValueError, TypeError) as e: # Try the next attachment/message logger.exception() logger.error(str(e)) # # Store what info we have # email_info = _get_email_info(msg) diagnostic_info_record_id = None if diagnostic_info: # Add the user's email information to diagnostic_info. # This will allow us to later auto-respond, or act as a # remailer between the user and the Psiphon support team. diagnostic_info['EmailInfo'] = email_info # Store the diagnostic info diagnostic_info_record_id = datastore.insert_diagnostic_info(diagnostic_info) # Store the association between the diagnostic info and the email datastore.insert_email_diagnostic_info(diagnostic_info_record_id, msg['msgobj']['Message-ID'], msg['subject']) # Store autoresponder info regardless of whether there was a diagnostic info datastore.insert_autoresponder_entry(email_info, diagnostic_info_record_id) logger.debug_log('maildecryptor.go end')
def initialize(config_file): with INIT_LOCK: global CONFIG global CONFIG_FILE global _INITIALIZED global CURRENT_VERSION global LATEST_VERSION global UMASK global POLLING_FAILOVER CONFIG = plexpy.config.Config(config_file) CONFIG_FILE = config_file assert CONFIG is not None if _INITIALIZED: return False if CONFIG.HTTP_PORT < 21 or CONFIG.HTTP_PORT > 65535: plexpy.logger.warn( 'HTTP_PORT out of bounds: 21 < %s < 65535', CONFIG.HTTP_PORT) CONFIG.HTTP_PORT = 8181 if not CONFIG.HTTPS_CERT: CONFIG.HTTPS_CERT = os.path.join(DATA_DIR, 'server.crt') if not CONFIG.HTTPS_KEY: CONFIG.HTTPS_KEY = os.path.join(DATA_DIR, 'server.key') if not CONFIG.LOG_DIR: CONFIG.LOG_DIR = os.path.join(DATA_DIR, 'logs') if not os.path.exists(CONFIG.LOG_DIR): try: os.makedirs(CONFIG.LOG_DIR) except OSError: CONFIG.LOG_DIR = None if not QUIET: sys.stderr.write("Unable to create the log directory. " \ "Logging to screen only.\n") # Start the logger, disable console if needed logger.initLogger(console=not QUIET, log_dir=CONFIG.LOG_DIR, verbose=VERBOSE) if not CONFIG.BACKUP_DIR: CONFIG.BACKUP_DIR = os.path.join(DATA_DIR, 'backups') if not os.path.exists(CONFIG.BACKUP_DIR): try: os.makedirs(CONFIG.BACKUP_DIR) except OSError as e: logger.error("Could not create backup dir '%s': %s" % (CONFIG.BACKUP_DIR, e)) if not CONFIG.CACHE_DIR: CONFIG.CACHE_DIR = os.path.join(DATA_DIR, 'cache') if not os.path.exists(CONFIG.CACHE_DIR): try: os.makedirs(CONFIG.CACHE_DIR) except OSError as e: logger.error("Could not create cache dir '%s': %s" % (CONFIG.CACHE_DIR, e)) # Initialize the database logger.info('Checking to see if the database has all tables....') try: dbcheck() except Exception as e: logger.error("Can't connect to the database: %s" % e) # Check if PlexPy has a uuid if CONFIG.PMS_UUID == '' or not CONFIG.PMS_UUID: my_uuid = generate_uuid() CONFIG.__setattr__('PMS_UUID', my_uuid) CONFIG.write() # Get the currently installed version. Returns None, 'win32' or the git # hash. CURRENT_VERSION, CONFIG.GIT_BRANCH = versioncheck.getVersion() # Write current version to a file, so we know which version did work. # This allowes one to restore to that version. The idea is that if we # arrive here, most parts of PlexPy seem to work. if CURRENT_VERSION: version_lock_file = os.path.join(DATA_DIR, "version.lock") try: with open(version_lock_file, "w") as fp: fp.write(CURRENT_VERSION) except IOError as e: logger.error("Unable to write current version to file '%s': %s" % (version_lock_file, e)) # Check for new versions if CONFIG.CHECK_GITHUB_ON_STARTUP and CONFIG.CHECK_GITHUB: try: LATEST_VERSION = versioncheck.checkGithub() except: logger.exception("Unhandled exception") LATEST_VERSION = CURRENT_VERSION else: LATEST_VERSION = CURRENT_VERSION # Get the real PMS urls for SSL and remote access if CONFIG.PMS_TOKEN and CONFIG.PMS_IP and CONFIG.PMS_PORT: plextv.get_real_pms_url() pmsconnect.get_server_friendly_name() # Refresh the users list on startup if CONFIG.PMS_TOKEN and CONFIG.REFRESH_USERS_ON_STARTUP: plextv.refresh_users() # Refresh the libraries list on startup if CONFIG.PMS_IP and CONFIG.PMS_TOKEN and CONFIG.REFRESH_LIBRARIES_ON_STARTUP: pmsconnect.refresh_libraries() # Store the original umask UMASK = os.umask(0) os.umask(UMASK) _INITIALIZED = True return True
def get_page_link_list(self, url): #mc = MyCurl(proxy_ip='192.168.200.253:3128', accept_encoding='gzip, deflate') #mc = MyCurl(proxy_ip='127.0.0.1:8888', accept_encoding='gzip, deflate') mc = MyCurl(accept_encoding='gzip, deflate') pic_regex = re.compile(r'.+\.(jpg|jpeg|gif|png|bmp|xml|json|swf|zip)$') _a_list = [] scheme, netloc = urlparse.urlparse(url)[:2] if scheme not in ('http', 'https'): return [] try: count = 0 while True: h, page = mc.get_page(url) #对于q=xxx site:domain.xxx之类的搜索 搜索结果如果有100条 频繁的访问domain.xxx 会导致503 尤其是这个网站用了cf之类的 if h['http-code'] == 503: if count > 2: return [] else: time.sleep(3) count += 1 continue else: break page = gzdecode(page) content_type = h['content-type'] if 'text/html' not in content_type: print 'content_type: %s' % content_type return [] r = re.search(r'charset=(.+)', content_type) if not r: charset = 'utf-8' else: charset = r.group(1) page = page.decode(charset, 'ignore') d = pq(page) a_list = d('a, iframe') for l in a_list.items(): if l[0].tag == 'iframe': href = l.attr('src') if not href: continue else: href = l.attr('href') if not href: continue if 'javascript' in href: continue if pic_regex.search(href.lower()): continue href = 'http:' + href if href[:2] == '//' else href _scheme, _netloc = urlparse.urlparse(href)[:2] if _scheme and (_scheme not in ('http', 'https')): continue if not _scheme and not _netloc: href = scheme + '://' + netloc + ('' if href[0] == '/' else '/') + href _a_list.append(href) except Exception as e: logger.error('get_page_link_list %s %s' % (url, str(e))) logger.exception(e) else: pass # try: # if not _a_list: # _uuid = uuid.uuid1().get_hex() # localtime = time.localtime() # tmp_name = str(localtime[0]) + str(localtime[1]) + str(localtime[2]) + _uuid[0:8] + _uuid[16:20] # with open(tmp_name + '.html', 'w+') as f: # f.write(url + '\n') # f.write(page) # with open(tmp_name + '2.html', 'w+') as f2: # f2.write(url + '\n') # f2.write(str(d)) # except Exception as e: # logger.error('if not _a_list %s %s' % (url, str(e))) # logger.exception(e) return [(l, url) for l in list(set(_a_list))]