def write_log(deploy_task_info, log):
    submit_uuid = deploy_task_info[0]["submit_uuid"]
    host_ip = deploy_task_info[0]["host_ip"]
    port = deploy_task_info[0]["port"]
    insert_sql = "insert into deploy_mysql_instance_log(submit_uuid,host_ip,port,deploy_log) values('{}','{}','{}','{}')".format(
        submit_uuid, host_ip, port, log)
    DbHelper.dml(insert_sql)
def report_mysql_port(host_ip):
    # 从正在运行端口获取
    running_port_list = []
    pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]
    for pid in pids:
        try:
            process_cmd_info_list = open(os.path.join('/proc', pid, 'cmdline'),
                                         'rb').read().split(b'\0')
            match_cmd = process_cmd_info_list[0].decode('utf-8')
            if re.findall('(mysqld)', match_cmd) and re.findall(
                    '(--port)', process_cmd_info_list[-2].decode('utf-8')):
                running_port = process_cmd_info_list[-2].decode('utf-8').split(
                    '=')[-1]
                if running_port not in running_port_list:
                    running_port_list.append(int(running_port))
        except Exception as e:
            print(e)  #不需要打印到日志,因为有些进程号是瞬间的,会误导
    for running_port in running_port_list:
        sql = "replace into deployed_mysql_port(host_ip,port) values('{}',{})".format(
            host_ip, running_port)
        DbHelper.dml(sql)
    # 从安装目录获取
    port_list = [3306, 3307, 3308, 3309, 3310, 3311, 3312, 3313, 3314, 3315]
    for check_port in port_list:
        if os.path.exists('/data/{}'.format(check_port)) or os.path.exists(
                '/data/mysql/multi/{}'.format(check_port)):
            sql = "replace into deployed_mysql_port(host_ip,port) values('{}',{})".format(
                host_ip, check_port)
            DbHelper.dml(sql)
class CommentsPipeline(object):
    def open_spider(self, spider):
        self.data = []
        self.dbUtils = DbHelper()
        self.logger = get_logger(self.__class__.__name__)

    def close_spider(self, spider):
        try:
            if len(self.data) > 0:
                latest_comment_id = self.__get_latest_comment_id(
                    self.data[0]['news_id'])
                # Handle data (update, filter, clean)
                columns = ['comment_id', 'news_id', 'comment_time', 'comment']
                df = pd.DataFrame(self.data, columns=columns)
                df = df.dropna().drop_duplicates().query(
                    f'comment_id > {latest_comment_id}')
                df['sentiment'] = df['comment'].apply(self.__sentiment)
                data = [
                    Comments(comment=item['comment'],
                             news_id=item['news_id'],
                             comment_id=item['comment_id'],
                             comment_time=item['comment_time'].to_pydatetime(),
                             sentiment=item['sentiment'])
                    for item in df.to_dict('records')
                ]
                self.dbUtils.insert(data)
        except Exception as ex:
            self.logger.error(
                "Exception occurred handling data when spider is closed.", ex)

    def process_item(self, item, spider):
        if item:
            self.data.append(dict(item))
        return item

    def __get_latest_comment_id(self, news_id):
        session = self.dbUtils.Session()
        try:
            res = session.query(func.max(
                Comments.comment_id)).filter(Comments.news_id == news_id)
            return res[0][0] if res[0] and res[0][0] and res[0][0] > 0 else 0
        except Exception as ex:
            raise ex
        finally:
            session.close()

    def __sentiment(self, text):
        return SnowNLP(text).sentiments
Ejemplo n.º 4
0
class SentimentService:
    def __init__(self):
        self.db_helper = DbHelper()

    def __init_data(self):
        session = self.db_helper.Session()
        query = session.query(Sentiment)
        ret = self.db_helper.query(query, page_size=1).count()
        if ret == 0:
            print("No data found, loading data...")
            # insert data
            items = []
            for data_item in self.__load_data():
                item_name = '嫌疑人'
                sentiment = self.__sentiment(data_item['comment'])
                item = Sentiment(item_name=item_name,
                                 score=data_item['score'],
                                 trend=data_item['trend'],
                                 comment=data_item['comment'],
                                 sentiment=sentiment)
                items.append(item)
            session.add_all(items)
            session.commit()
        session.close()

    def top_sentiments(self, page_size=10):
        self.__init_data()
        session = self.db_helper.Session()
        query = session.query(Sentiment).filter(
            Sentiment.sentiment < 0.99).order_by(Sentiment.sentiment.desc())
        results = self.db_helper.query(query, page_size=page_size)
        session.close()
        return [result.to_dict() for result in results]

    def __sentiment(self, text):
        return SnowNLP(text).sentiments

    def __load_data(self):
        backend_dir = Path(os.path.dirname(os.path.abspath(__file__))).parent
        source = backend_dir.joinpath("resources/data.csv")
        df = pd.read_csv(source)
        return df.to_dict('records')
def get_task_info(host_ip):
    # 开始获取任务
    deploy_info_sql = "select submit_uuid,host_ip,port,deploy_status,deploy_archit,deploy_env,deploy_other_param from deploy_mysql_instance where host_ip='{}' and deploy_status=0 and timestampdiff(second,ctime,now())<86400 limit 1".format(
        host_ip)
    ret = DbHelper.find_all(deploy_info_sql)
    if ret['status'] != "ok": return False
    elif len(ret['data']) == 0: return False
    else:
        deploy_task_info = ret['data']
        log = "获取到部署任务"
        write_log(deploy_task_info, log)
        package_info_sql = "select pacakage_url,package_name,package_md5 from deploy_package_info where package_name='nucc_mysql.tar.gz'"
        ret = DbHelper.find_all(package_info_sql)
        if ret['status'] == "ok":
            if len(ret['data']) <= 0: raise "没有获取到部安装包"
            package_info = ret['data']
            return {
                "task": "yes",
                "deploy_task_info": deploy_task_info,
                'package_info': package_info
            }
def update_status(deploy_task_info, deploy_status):
    submit_uuid = deploy_task_info[0]["submit_uuid"]
    update_sql = "update deploy_mysql_instance set deploy_status={} where submit_uuid='{}'".format(
        deploy_status, submit_uuid)
    if deploy_status == 3:
        DbHelper.dml(update_sql)
    elif deploy_status == 2:
        DbHelper.dml(update_sql)
    elif deploy_status == 1:
        DbHelper.dml(update_sql)
Ejemplo n.º 7
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# gaochao
import time
import os
import collections
import platform as pf
import psutil as ps
import pymysql as db
import logging
logger = logging.getLogger('agent_logger')
from utils.db_helper import DbHelper
db_op_obj = DbHelper()


def conn_mysql_instance(host, port, user, password, database):
    try:
        return db.connect(host=host,
                          port=port,
                          user=user,
                          passwd=password,
                          db=database,
                          charset='utf8mb4',
                          cursorclass=db.cursors.DictCursor)
    except Exception as e:
        raise Exception('Can not build available connection!' + e)
        return None


def domain_is_valid(domain):
    if '.' in domain:
Ejemplo n.º 8
0
 def __init__(self):
     self.db_helper = DbHelper()
class NewsCommentsSpider(scrapy.Spider):

    url = os.getenv("NEWS_URL")
    comments_url = os.getenv("COMMENTS_URL")
    name = "news_comments"
    start_urls = [url]
    comments_per_page = int(os.getenv("COMMENTS_PER_PAGE", "20"))
    dbUtils = DbHelper()
    logger = get_logger('news_comments_spider')
    max_pages = int(os.getenv("MAX_PAGES", "10"))

    def parse(self, response):
        try:
            news_name = response.xpath(
                "//div[@id='wrapper']/h1[1]/span[1]/text()").extract_first(
                ).strip()
            news_id = Helper.md5(news_name)
            self.__add_news(
                News(news_id=news_id, news_name=news_name, source=self.url))
            item = CommentsItem()
            item['news_id'] = news_id
            total_comments = int(
                re.findall(
                    r'\d+',
                    response.xpath(
                        "//div[@id='content']/div/div[@class='article']/div[@class='related_info']/div[@class='mod-hd']/h2[1]/span[@class='pl']/a/text()"
                    ).extract_first().strip())[0])
            pages = int(
                total_comments / self.comments_per_page
            ) if total_comments % self.comments_per_page == 0 else int(
                total_comments / self.comments_per_page) + 1
            # Get all comments in pages, but crawl up to max_pages
            if pages > self.max_pages:
                pages = self.max_pages
            urls = [f'{self.comments_url}?p={p+1}' for p in range(pages)]
            for c_url in urls:
                yield scrapy.Request(c_url,
                                     meta={'item': item},
                                     callback=self.__parse_comments)
        except Exception as ex:
            self.logger.error(
                f"Exception occurred when parsing page {self.url}", ex)

    def __parse_comments(self, response):
        for sel in response.xpath("//div[@id='comments']/ul/li"):
            try:
                item = response.meta['item']
                item['comment_id'] = int(
                    sel.xpath("@data-cid").extract_first().strip())
                item['comment'] = sel.xpath(
                    "div[@class='comment']/p[1]/span[1]//text()"
                ).extract_first().strip()
                item['comment_time'] = Helper.parse_comment_time(
                    sel.xpath(
                        "div[@class='comment']/h3[1]/span[2]/span[2]/text()").
                    extract_first().strip())
                yield item
            except Exception as ex:
                self.logger.error(
                    f"Exception occurred when parsing comment with response {response}",
                    ex)
                yield None

    def __add_news(self, news_item):
        if self.__check_news(news_item) == 0:
            self.dbUtils.insert([news_item])

    def __check_news(self, news_item):
        session = self.dbUtils.Session()
        try:
            result = session.query(News).filter(
                News.news_id == news_item.news_id).count()
            return result
        except Exception as ex:
            raise ex
        finally:
            session.close()
 def __init__(self):
     self.db_helper = DbHelper()
     self.news_id = self.__get_news_id()
     self.logger = get_logger(self.__class__.__name__)
class NewsCommentsService:

    DEFAULT_PAGE_SIZE = int(os.getenv("PAGE_SIZE", "25"))
    DEFAULT_POSITIVE_THRESHOLD = float(os.getenv("POSITIVE_THRESHOLD", "0.6"))

    def __init__(self):
        self.db_helper = DbHelper()
        self.news_id = self.__get_news_id()
        self.logger = get_logger(self.__class__.__name__)

    def __get_news_id(self):
        session = self.db_helper.Session()
        try:
            res = session.query(News.news_id).first()
            return res[0]
        except Exception as ex:
            raise ex
        finally:
            session.close()

    def search_comments(self,
                        q,
                        page=0,
                        page_size=DEFAULT_PAGE_SIZE,
                        startdate=None,
                        enddate=None):
        session = self.db_helper.Session()
        try:
            news = session.query(News).filter(
                News.news_id == self.news_id).first()
            query = session.query(Comments).filter(
                Comments.news_id == self.news_id).filter(
                    Comments.comment.contains(q))
            t_query = session.query(func.count('*').label('total')).filter(
                Comments.news_id == self.news_id).filter(
                    Comments.comment.contains(q))
            if startdate and startdate != '':
                query = query.filter(
                    Comments.comment_time >= Helper.get_date(startdate))
                t_query = t_query.filter(
                    Comments.comment_time >= Helper.get_date(startdate))
            if enddate and enddate != '':
                query = query.filter(
                    Comments.comment_time <= Helper.get_date(enddate))
                t_query = t_query.filter(
                    Comments.comment_time <= Helper.get_date(enddate))
            query = query.order_by(Comments.comment_time.desc())
            results = self.db_helper.query(query,
                                           page=page,
                                           page_size=page_size)
            total_comments = int(t_query[0][0])
            pages = int(total_comments /
                        page_size) if total_comments % page_size == 0 else int(
                            total_comments / page_size) + 1
            return {
                'dates': {
                    'start': (Helper.get_date(startdate).strftime('%Y-%m-%d')
                              if startdate and startdate != '' else ''),
                    'end': (Helper.get_date(enddate).strftime('%Y-%m-%d')
                            if enddate and enddate != '' else '')
                },
                'pages': pages,
                'comments': [result.to_dict() for result in results],
                'news': news.to_dict()
            }
        except Exception as ex:
            self.logger.error("Exception occurred when searching comments. ",
                              ex)
            return {'pages': 0, 'comments': []}
        finally:
            session.close()

    def get_data(self, page=0, page_size=DEFAULT_PAGE_SIZE):
        session = self.db_helper.Session()
        try:
            news = session.query(News).filter(
                News.news_id == self.news_id).first()
            comment_query = session.query(Comments).filter(
                Comments.news_id == self.news_id)
            comments = self.db_helper.query(comment_query,
                                            page=page,
                                            page_size=page_size).order_by(
                                                Comments.comment_id.desc())
            comment_nums = session.query(
                Comments.comment_time,
                func.count('*').label('comments_num')).filter(
                    Comments.news_id == self.news_id).group_by(
                        Comments.comment_time)
            dates = [result[0].strftime("%Y-%m-%d") for result in comment_nums]
            total_comments = int(
                session.query(func.count('*').label('total')).filter(
                    Comments.news_id == self.news_id)[0][0])
            pages = int(total_comments /
                        page_size) if total_comments % page_size == 0 else int(
                            total_comments / page_size) + 1
            return {
                'news':
                news.to_dict(),
                'dates':
                dates,
                'comments': [comment.to_dict() for comment in comments],
                'comment_nums': [{
                    'date': result[0].strftime("%Y-%m-%d"),
                    'count': result[1]
                } for result in comment_nums],
                'pages':
                pages
            }
        except Exception as ex:
            self.logger.error("Exception occurred when getting data. ", ex)
            return {
                'news': {},
                'dates': [],
                'comments': [],
                'comment_nums': []
            }
        finally:
            session.close()

    def get_data_by_date(self,
                         page=0,
                         page_size=DEFAULT_PAGE_SIZE,
                         datestr=None):
        session = self.db_helper.Session()
        try:
            news = session.query(News).filter(
                News.news_id == self.news_id).first()
            comment_query = session.query(Comments).filter(
                Comments.news_id == self.news_id).filter(
                    Comments.comment_time == Helper.get_date(
                        datestr)).order_by(Comments.comment_id.desc())
            comments = self.db_helper.query(comment_query,
                                            page=page,
                                            page_size=page_size)
            sentiment_nums = session.query(
                Comments.comment_time,
                func.sum(
                    sql.case([(sql.column('sentiment') >=
                               self.DEFAULT_POSITIVE_THRESHOLD, 1)],
                             else_=0)).label('positive'),
                func.sum(
                    sql.case([(sql.column('sentiment') <
                               self.DEFAULT_POSITIVE_THRESHOLD, 1)],
                             else_=0)).label('negative')
            ).filter(Comments.news_id == self.news_id).filter(
                Comments.comment_time == Helper.get_date(datestr)).group_by(
                    Comments.comment_time)
            total_comments = int(
                session.query(func.count('*').label('total')).
                filter(Comments.news_id == self.news_id).filter(
                    Comments.comment_time == Helper.get_date(datestr))[0][0])
            session.close()
            pages = int(total_comments /
                        page_size) if total_comments % page_size == 0 else int(
                            total_comments / page_size) + 1
            return {
                'news': news.to_dict(),
                'comments': [comment.to_dict() for comment in comments],
                'date': sentiment_nums[0][0].strftime("%Y-%m-%d"),
                'positive': int(sentiment_nums[0][1]),
                'negative': int(sentiment_nums[0][2]),
                'total': int(sentiment_nums[0][1]) + int(sentiment_nums[0][2]),
                'pages': pages
            }
        except Exception as ex:
            self.logger.error("Exception occurred when getting data by date. ",
                              ex)
            return {
                'news': {},
                'comments': [],
                'date': '',
                'positive': 0,
                'negative': 0,
                'total': 0
            }
        finally:
            session.close()
 def open_spider(self, spider):
     self.data = []
     self.dbUtils = DbHelper()
     self.logger = get_logger(self.__class__.__name__)