def crawl_community_by_district(self, args): """ 根据区县爬取一页小区信息 """ district, page = args url_page = self.base_url + f"xiaoqu/{district}/pg{page}/" content = self.request_fn(url_page) soup = BeautifulSoup(content, self.bs4_parser) logging.debug('@crawl_community_by_district: {0} - page - {1}: {2}'.format(district, page, url_page)) session = DBSession() for ul_tag in soup.find_all("ul", class_="listContent"): for item_tag in ul_tag.find_all("li"): try: info_dict = self.parse_community_content(item_tag) query = session.query(CommunityInfo).filter(CommunityInfo.id == info_dict['id']) if query.first(): query.update(info_dict) else: session.add(CommunityInfo(**info_dict)) session.commit() logging.debug('@crawl_community_by_district: {0} - page - {1}: {2}'.format(district, page, info_dict)) except Exception as e: session.rollback() logging.exception('@crawl_community_by_district: {0} - page - {1}: {2}'.format(district, page, e)) time.sleep(3) session.close() logging.info('@crawl_community_by_district: {0} - page - {1} complete.'.format(district, page))
def init_database(app): settings = None try: settings = DBSiteSettings.get_by_id(1) if not settings or not settings.inited: raise Exception("Can not get site settings") if settings.version < DBSiteSettings.VERSION: raise Exception("Database expired") except: from alembic import command # auto generate alembic version in local try: command.revision(app.config["MIGRATE_CFG"], "database v%s" % DBSiteSettings.VERSION, True) except: logging.exception("migrate revision error") command.upgrade(app.config["MIGRATE_CFG"], "head") if not settings: settings = create_default_settings(app) else: settings.inited = True settings.version = DBSiteSettings.VERSION settings.save() app.config["SiteTitle"] = settings.title app.config["SiteSubTitle"] = settings.subtitle app.config["OwnerEmail"] = settings.owner
def delete_file(cls, real_file, failsafe=True): try: delete_file(real_file) except: logging.exception("delete file error") if not failsafe: raise
def crawl_transaction_by_search(self, args): """ 依据商圈或小区 爬取一页历史成交房源 """ search_key, page = args url_page = self.base_url + f"chengjiao/pg{page}rs{search_key}/" content = self.request_fn(url_page) soup = BeautifulSoup(content, self.bs4_parser) logging.debug('@crawl_transaction_by_search: {0} - page - {1}: {2}'.format(search_key, page, url_page)) session = DBSession() for ul_tag in soup.find_all("ul", class_="listContent"): for item_tag in ul_tag.find_all("li"): try: info_dict = self.parse_transaction_content(item_tag) query = session.query(TransactionInfo).filter(TransactionInfo.id == info_dict['id']) if query.first(): query.update(info_dict) else: session.add(TransactionInfo(**info_dict)) session.commit() logging.debug('@crawl_transaction_by_search: {0} - page - {1}: {2}'.format( search_key, page, info_dict)) except Exception as e: logging.exception('@crawl_transaction_by_search: {0} - page - {1}: {2}'.format( search_key, page, e)) time.sleep(3) logging.info('@crawl_transaction_by_search: {0} - page - {1} complete.'.format(search_key, page))
def fail_safe(*args, **kwargs): try: return func(*args, **kwargs) except: logging.exception("error in func call [%s]" % func.__name__) if not fail_safe: raise
def save_photo(binary): from tools import save_file mime, ext = get_img_type(binary) if mime == ImageMime.UNKNOWN: raise Exception("unsupported image format") filename = str(uuid.uuid1()) + ext url, real_file = save_file(binary, filename, mime_type=mime) url_thumb = real_file_thumb = "" try: import StringIO from PIL import Image from settings import THUMB_SIZE im = Image.open(StringIO.StringIO(binary)) thumb = StringIO.StringIO() thumb_filename = str(uuid.uuid1()) + "_thumb" + ext im.thumbnail(THUMB_SIZE) thumb.name = thumb_filename im.save(thumb) url_thumb, real_file_thumb = save_file(thumb.getvalue(), thumb_filename, mime_type=mime) except: logging.exception("save thumb error") return url, real_file, url_thumb, real_file_thumb, mime
def crawl_sale_by_search(self, args): """ 根据商圈或社区爬取一页在售房源 """ search_key, page = args url_page = self.base_url + f"ershoufang/pg{page}rs{search_key}/" content = self.request_fn(url_page) soup = BeautifulSoup(content, self.bs4_parser) logging.debug('@crawl_sale_by_search: {0} - page - {1}: {2}'.format(search_key, page, url_page)) session = DBSession() for ul_tag in soup.find_all("ul", class_="sellListContent"): for item_tag in ul_tag.find_all("li"): try: info_dict = self.parse_sale_content(item_tag) logging.debug('@crawl_sale_by_search: {0} - page - {1}: {2}'.format(search_key, page, info_dict)) sale_info = SaleInfo(**info_dict) if not sale_info.house_id or not sale_info.community_id or not sale_info.district: continue session.add(sale_info) except Exception as e: session.rollback() logging.exception('@crawl_sale_by_search: {0} - page - {1}: {2}'.format(search_key, page, e)) time.sleep(3) session.commit() session.close() logging.info('@crawl_sale_by_search: {0} - page - {1} complete.'.format(search_key, page))
def get_total_num_pages(sector_url): """This func gets a total number of pages to parse in each sector. sector_url is a link from list sectors_search_url presented in settings.py For example, https://5karmanov.ru/cat/aksessuary-muzhskie It is need to created a sector page url via create_sector_page_url (for example, https://5karmanov.ru/cat/aksessuary-muzhskie?&page=3) :return sector_pages_qt int """ sector_page_html = get_html(sector_url) if sector_page_html: soup = BeautifulSoup(sector_page_html, "html.parser") try: last_page_url = soup.find("ul", class_="pagination").find_next( "li", class_="more").find_next('a')["href"] except AttributeError: last_page_url = soup.find("ul", class_="pagination").find_all("a") last_page_url = last_page_url[-2]["href"] sector_pages_qt = last_page_url.partition("page=")[-1].strip('"') try: int(sector_pages_qt) except ValueError as e: logging.exception( f"Fail to get page_num at {sector_url}.Received data is not integer {e}" ) print("Не удалось загрузить секцию", sector_url) return int(sector_pages_qt) else: logging.exception(f"Failed to get {sector_url} html") print("Не удалось загрузить секцию", sector_url)
def post(postid): try: post = apis.Post.get_by_id(postid) except Exception, e: from settings import logging logging.exception("post not found") abort(404)
def dispatch_action(parameters, action): result = ERROR_RES.copy() try: parameters = parameters.to_dict() method = AJAX_METHODS.get(action) print parameters if method: for key in parameters: if key == 'file_name': continue val = json.loads(parameters[key]) if isinstance(val, basestring): val = json_loads(val) parameters[key] = val res = method(**parameters) result["status"] = "ok" result["response"] = res else: result["status"] = "error" result["error"] = format_key("unsupported method [%s]", action) except Exception, e: logging.exception("Error in dispatch_action:") result["error"] = unicode(e) result["status"] = "error"
def post(postid): try: post = apis.Post.get_by_id(postid) except Exception,e: from settings import logging logging.exception("post not found") abort(404)
def multi_kpi_vs_time(self, label1, label2): """ Void -> save plot to .graphs/ """ try: new = Reader(self.csvfile, self.columns) logging.info( f'{__class__.__name__ } [{new.__class__.__name__} Processing {self.columns}' ) param = new.read_single_col(self.columns[0]) param2 = new.read_single_col(self.columns[1]) logging.info( f'{__class__.__name__ } [Target KPIs [{self.columns[0]}] [{self.columns[1]}] ' ) logging.info( f'{__class__.__name__ } [Number of KPIs to be drawn - {(len(param)+len(param2))}' ) time = new.read_single_col(self.columns[2]) timestampt = [dateutil.parser.parse(s) for s in time] figure = plt.figure() ax = plt.gca().xaxis.set_major_locator(md.HourLocator(interval=5)) figure, ax = plt.subplots(figsize=(15, 4)) ax.xaxis.set_major_formatter(md.DateFormatter('%d-%m-%Y-%H:%M')) plt.plot_date( x=(timestampt), y=(param), xdate=True, fmt='r', label=label1, ) plt.plot_date( x=(timestampt), y=(param2), xdate=True, fmt='b', label=label2, ) plt.xticks(rotation=40) plt.xticks(timestampt) plt.tight_layout() plt.legend(loc="upper left") plt.subplots_adjust(wspace=1, bottom=0.2) plt.title(self.title, loc='center') ax.tick_params(direction='out', length=1, width=0.5, color='b') figure = plt.gca().xaxis.set_major_locator( md.HourLocator(interval=5)) plt.savefig(f'graphs/{self.image_name}', bbox_inches='tight') logging.info( f'{__class__.__name__ } [Successfully created {self.image_name} graph' ) except Exception as e: logging.exception( f'{__class__.__name__ } [Exception during creation of {self.image_name} graph' ) logging.exception(f'{__class__.__name__ } [Exception: {e}', exc_info=1) finally: plt.clf() plt.close(figure)
def acs_stats_multi(self): try: if isinstance(self.list_of_csv, list): logging.info( f'{__class__.__name__ } [Processing CSVs: \n [{self.list_of_csv}]' ) for single_csv in self.list_of_csv: logging.info( f'{__class__.__name__ } [Start - Processing report - {single_csv}]' ) server = str(single_csv).replace('.csv', '') acs_sockets1 = [ 'acs_port_8080', 'acs_port_80', 'timestampt' ] acss1 = Analysis(single_csv, acs_sockets1, f'{server.upper()} port 80 vs 8080', f'{server}_8080_vs_80.png') acss1.multi_kpi_vs_time('8080', '80') acs_sockets2 = [ 'acs_port_8181', 'acs_port_8182', 'timestampt' ] acss2 = Analysis(single_csv, acs_sockets2, f'{server.upper()} port 8181 vs 8182', f'{server}_8181_vs_8182.png') acss2.multi_kpi_vs_time('8181', '8182') acs_sockets3 = [ 'acs_port_443', 'acs_port_8443', 'timestampt' ] acss2 = Analysis(single_csv, acs_sockets3, f'{server.upper()} port 443 vs 8443', f'{server}_443_vs_8443.png') acss2.multi_kpi_vs_time('443', '8443') ram = ['total_ram', 'used_ram', 'timestampt'] used_ram = Analysis(single_csv, ram, f'{server.upper()} Used RAM', f'{server}_used_ram.png') used_ram.multi_kpi_vs_time('total', 'used') logging.info( f'{__class__.__name__ } [Finish - Processing report - {single_csv}]' ) else: logging.error( f'{__class__.__name__ } [{self.list_of_csv} is not type of list!' ) except Exception as e: logging.exception(f'{__class__.__name__ } [Exception: {e}', exc_info=1)
def get_html(search_url): """Opens a search_url and returns a response object""" try: result = requests.get(search_url) result.raise_for_status() return result.text except (requests.RequestException, ValueError) as e: print("Connection error!") logging.exception(f"Error {e}") return False
def compare_acs(self): try: logging.info( f'{__class__.__name__ } [Successfully created {self.image_name} graph' ) except Exception as e: logging.exception( f'{__class__.__name__ } [Exception during creation of {self.image_name} graph' ) logging.exception(f'{__class__.__name__ } [Exception: {e}', exc_info=1)
def create_sector_page_url(sector_url, page_number): """This func creates a url for single page in each sector. https://5karmanov.ru/cat/aksessuary-muzhskie?&page=5 sector_url: str, url of a sector (presented in sectors_search_url in settings.py) page_number: int, serial number of page (received via get_total_num_pages) """ sector_page_url = sector_url + "?&page=" + str(page_number) if url_validator(sector_page_url): return sector_page_url else: logging.exception( "Ошибка формирования ссылки страницы галереи товаров секции")
def upload_handler(): result = {} try: blob_infos = get_blob_info_list(request) if not blob_infos: raise Exception("can not get blob info") photos = [apis.Photo.create_photo_with_url(**info) for info in blob_infos] result["status"] = "ok" result["photos"] = photos except Exception, e: from settings import logging logging.exception("error in upload_handler:") result["error"] = unicode(e) result["status"] = "error"
def post(postid): try: post = apis.Post.get_by_id(postid) if not post: from ajax import MSG_NO_POST raise Exception(MSG_NO_POST % {"id": postid}) if not post.public: user = apis.User.get_current_user() if not user.is_admin(): raise Exception("not auth post %s" % postid) except Exception, e: from settings import logging logging.exception("post not found") abort(404)
def post(postid): try: post = apis.Post.get_by_id(postid) if not post: from ajax import MSG_NO_POST raise Exception(MSG_NO_POST % {"id": postid}) if not post.public: user = apis.User.get_current_user() if not user.is_admin(): raise Exception("not auth post %s" % postid) except Exception,e: from settings import logging logging.exception("post not found") abort(404)
def upload_handler(): result = {} try: blob_infos = get_blob_info_list(request) if not blob_infos: raise Exception("can not get blob info") photos = [ apis.Photo.create_photo_with_url(**info) for info in blob_infos ] result["status"] = "ok" result["photos"] = photos except Exception, e: from settings import logging logging.exception("error in upload_handler:") result["error"] = unicode(e) result["status"] = "error"
def save_photo(binary): mime, ext = get_img_type(binary) if mime == ImageMime.UNKNOWN: raise Exception("unsupported image format") rand_str = str(uuid.uuid1()) filename = rand_str + ext if QINIU_SETTINGS.Enabled: url, url_thumb = save_file_qiniu(binary, filename, mime) return url, url, url_thumb, url_thumb, mime else: from tools import save_file url, real_file = save_file(binary, filename, mime_type=mime) url_thumb = real_file_thumb = "" try: import StringIO from PIL import Image from settings import THUMB_SIZE im = Image.open(StringIO.StringIO(binary)) im.thumbnail(THUMB_SIZE, Image.ANTIALIAS) thumb = StringIO.StringIO() thumb.name = filename im.save(thumb) binary = thumb.getvalue() thumb.close() mime, ext = get_img_type(binary) thumb_filename = rand_str + "_thumb" + ext url_thumb, real_file_thumb = save_file(binary, thumb_filename, mime_type=mime) except: logging.exception("save thumb error") return url, real_file, url_thumb, real_file_thumb, mime
def query_community(cls, districts=None, biz_circle=None): """ 查小区 """ session = DBSession() if districts: query = session.query(CommunityInfo.community) \ .filter(CommunityInfo.district.in_(districts)) \ .all() elif biz_circle: query = session.query(CommunityInfo.community) \ .filter(CommunityInfo.biz_circle.in_(biz_circle)) \ .all() else: query = [[]] logging.exception("@query_community: query condition un-defined.") session.commit() session.close() result = list(set([x[0] for x in query])) result.sort() return result
def text_stats(self): try: logging.info( f'{__class__.__name__ } [Start - TXT stats generation') data = pd.read_csv(self.csvfile, usecols=self.columns) server = str(self.csvfile).replace('.csv', '') with open(f'reports/{server}-kpis-summury.txt', 'a') as stats: stats.write(f'KPIs Summury\n') for i in self.columns: max_v = data[i].max() min_v = data[i].min() avg = round(sum(data[i]) / len(data[i]), 2) MAX = f'MAX value for {i} - {max_v}' MIN = f'MIN value for {i} - {min_v}' AVG = f'AVG value for {i} - {avg}' with open(f'reports/{server}-kpis-summury.txt', 'a') as stats: stats.write(f'\n{MAX}\n{MIN}\n{AVG}\n') logging.info(f'{__class__.__name__ } [End - TXT stats generation') except Exception as e: logging.exception(f'{__class__.__name__ } [Exception: {e}', exc_info=1)
def dispatch_action(parameters, action): result = ERROR_RES.copy() try: parameters = parameters.to_dict() method = AJAX_METHODS.get(action) if method: for key in parameters: val = json.loads(parameters[key]) if isinstance(val, basestring): val = json_loads(val) parameters[key] = val res = method(**parameters) result["status"] = "ok" result["response"] = res else: result["status"] = "error" result["error"] = "unsupported method [%s]" % action except Exception, e: logging.exception("Error in dispatch_action:") result["error"] = unicode(e) result["status"] = "error"
def crawl_district_pool(self, module, max_pages=100): """ 依据地区批量爬取 """ crawl_mapper = { 'sale_info': { 'url': 'ershoufang', 'func': self.crawl_sale_by_district }, 'community_info': { 'url': 'xiaoqu', 'func': self.crawl_community_by_district }, } url_prefix = crawl_mapper[module]['url'] crawl_function = crawl_mapper[module]['func'] for district in self.districts: url = self.base_url + f"{url_prefix}/{district}/" total_pages = self.get_total_pages(url) total_pages = min(total_pages, max_pages) logging.info("@crawl_{0}: total {1} pages found for {2}".format( module, total_pages, district)) if not total_pages: logging.exception("@crawl_{0}: no pages found for {1}".format( module, district)) continue executor = ThreadPoolExecutor(max_workers=self.max_workers) args = [(district, page + 1) for page in range(total_pages)] all_task = [executor.submit(crawl_function, arg) for arg in args] for future in as_completed(all_task): future.result() logging.info("@crawl_{0}: {1} - all {2} pages complete.".format( module, district, total_pages)) time.sleep(1)
def mng_stats(self): try: if isinstance(self.list_of_csv, list): logging.info( f'{__class__.__name__ } [Processing CSVs: \n [{self.list_of_csv}]' ) for single_csv in self.list_of_csv: logging.info( f'{__class__.__name__ } [Start - Processing report - {single_csv}]' ) server = str(single_csv).replace('.csv', '') acs_port1 = ['acs_8080', 'timestampt'] acs_8080 = Analysis( single_csv, acs_port1, f'{server.upper()} Connections on port 8080', f'{server}_port_8080.png', '8080', 10) acs_8080.single_kpi_vs_time() acs_port2 = ['acs_8181', 'timestampt'] acs_8181 = Analysis( single_csv, acs_port2, f'{server.upper()} Connections on port 8181', f'{server}_port_8181.png', '8181', 10) acs_8181.single_kpi_vs_time() db_c = ['oracle', 'timestampt'] db = Analysis(single_csv, db_c, f'{server.upper()} DB connections', f'{server}_db_connections.png', 'estb. connecitons', 10) db.single_kpi_vs_time() iisr = ['iis_ram', 'timestampt'] iisram = Analysis(single_csv, iisr, f'{server.upper()} RAM usage by IIS', f'{server}_iis_ram.png', 'GB', 10) iisram.single_kpi_vs_time() iisc = ['iis_cpu', 'timestampt'] iiscpu = Analysis(single_csv, iisr, f'{server.upper()} CPU usage by IIS', f'{server}_iis_cpu.png', '%', 10) iiscpu.single_kpi_vs_time() txt = [ "used_ram", "cpu_loadavg", "acs_8080", "acs_8181", "acs_8443", "oracle", "iis_ram", "iis_cpu" ] text = Analysis(single_csv, txt) text.text_stats() logging.info( f'{__class__.__name__ } [Finish - Processing report - {single_csv}]' ) else: logging.error( f'{__class__.__name__ } [{self.list_of_csv} is not type of list!' ) except Exception as e: logging.exception(f'{__class__.__name__ } [Exception: {e}', exc_info=1)
def acs_stats_single(self): try: if isinstance(self.list_of_csv, list): logging.info( f'{__class__.__name__ } [Processing CSVs: \n [{self.list_of_csv}]' ) for single_csv in self.list_of_csv: server = str(single_csv).replace('.csv', '') javathreads = ['javathreads', 'timestampt'] javat = Analysis(single_csv, javathreads, f'{server.upper()} Java threads', f'{server}_javathreads.png', 'threads') javat.single_kpi_vs_time() db_c = ['oracle_1521', 'timestampt'] db_conn = Analysis(single_csv, db_c, f'{server.upper()} DB connections', f'{server}_db_connections.png', 'estb. connecitons') db_conn.single_kpi_vs_time() acs_port1 = ['acs_port_8080', 'timestampt'] acs_8080 = Analysis( single_csv, acs_port1, f'{server.upper()} Connections on port 8080', f'{server}_port_8080.png', '8080') acs_8080.single_kpi_vs_time() acs_port2 = ['acs_port_80', 'timestampt'] acs_80 = Analysis( single_csv, acs_port2, f'{server.upper()} Connections on port 80', f'{server}_port_80.png', '80') acs_80.single_kpi_vs_time() acs_port3 = ['acs_port_8181', 'timestampt'] acs_8181 = Analysis( single_csv, acs_port3, f'{server.upper()} Connections on port 8181', f'{server}_port_8181.png', '8181') acs_8181.single_kpi_vs_time() acs_port4 = ['acs_port_8443', 'timestampt'] acs_8181 = Analysis( single_csv, acs_port4, f'{server.upper()} Connections on port 8443', f'{server}_port_8443.png', '8443') acs_8181.single_kpi_vs_time() acs_port4 = ['acs_port_443', 'timestampt'] acs_8181 = Analysis( single_csv, acs_port4, f'{server.upper()} Connections on port 443', f'{server}_port_443.png', '443') acs_8181.single_kpi_vs_time() cpu = ['cpu_load', 'timestampt'] cpu_load = Analysis(single_csv, cpu, f'{server.upper()} CPU load', f'{server}_cpu_load.png', '%') cpu_load.single_kpi_vs_time() txt = [ "javathreads", "oracle_1521", "acs_port_8080", "acs_port_8181", "acs_port_8443", "acs_port_80", "acs_port_443", "acs_port_8182", "cpu_load", "used_ram" ] text = Analysis(single_csv, txt) text.text_stats() else: logging.error( f'{__class__.__name__ } [{self.list_of_csv} is not type of list!' ) except Exception as e: logging.exception(f'{__class__.__name__ } [Exception: {e}', exc_info=1)
def send_email(smtp_port, smtp_server, smtp_user, smtp_pass, receiver_email, outcome, attach=None): try: host = socket.gethostname() project = os.path.join(os.path.dirname(__file__)) header = "Email from HOST: {} PROJECT: {}".format(host,project) message = MIMEMultipart("alternative") message["Subject"] = header message["From"] = smtp_user message["To"] = receiver_email now = datetime.datetime.now() timestampt = now.strftime("%Y-%m-%d-%H-%M") attach_file_name = f'{attach}-{timestampt}.txt' # Create the plain-text and HTML version of your message TEXT = """ Subject: {} Greetings! \n Job finished. \n Result: {} \n Log {} attached. \n Best Regards, \n AutomationAgentPy \n """.format(header, outcome, attach_file_name) HTML = """ <!DOCTYPE html> <html lang="en"> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <body> <div> <p>Greetings!</p> <p>Job finished.</p> <p>Result: {}</p> <p>Log {} attached</p> <p>Best Regards,</p> <p>AutomationAgentPy</p> </div> </body> </html> """.format(outcome, attach_file_name) shutil.copy(attach, attach_file_name) f = open(attach_file_name, 'r') attachment = MIMEText(f.read()) attachment.add_header('Content-Disposition', 'attachment', filename=attach_file_name) message.attach(attachment) # Turn these into plain/html MIMEText objects part1 = MIMEText(TEXT, "plain") part2 = MIMEText(HTML, "html") # Add HTML/plain-text parts to MIMEMultipart message # The email client will try to render the last part first message.attach(part1) message.attach(part2) except Exception: logging.exception(f'{__class__.__name__ } [Exception: {e}', exc_info=1) # Create secure connection with server and send email try: context = ssl.create_default_context() with smtplib.SMTP_SSL(smtp_server, smtp_port, context=context) as server: server.login(smtp_user, smtp_pass) logging.info(f'{__class__.__name__ } [SMTP session ARGS: \n Server: {smtp_server} \n Sender: {smtp_user} \n Attachment: {attach_file_name} \n Receiver: {receiver_email}') server.sendmail(smtp_user, receiver_email, message.as_string()) logging.info(f'{__class__.__name__ }] [Email sent ') server.quit() except Exception as e: logging.exception(f'{__class__.__name__ } [Exception: {e}', exc_info=1)