コード例 #1
0
import http.client
import time

############# setting elasticsearch
es_ip = "211.39.140.49"
es_port = 9200

############# setting tousflux
sentiment_score = {
    "POSITIVE" : "긍정",
    "NEGATIVE" : "부정",
    "ETC" : "중립"
}

############# setting logging
logger = myLogger.getMyLogger('posneg', hasConsoleHandler=False, hasRotatingFileHandler=True, logLevel=logging.DEBUG)

logger.info("ES Connection %s %d" % (es_ip, es_port) )

############# setting slack
slack = slackweb.Slack("https://hooks.slack.com/services/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")
slackChannel = "#dmap_error_alert"
slackUserName = "******"
slackIconEmoji = ":ghost:"

############# setting search options
MAX_TOUSFLUX_NUM=10
PAGE_SIZE=10
INDEX_EMOTIONS="documents-*"
TYPE_DOC="doc"
コード例 #2
0
'''
Created on 2017. 6. 13.

@author: Holly
'''
import http.client as hc
import logging
import json
from com.wisenut.enums.query import Query
import traceback
from com.wisenut import myLogger

############# setting logging
logger = myLogger.getMyLogger('esclient', False, True, logging.DEBUG)

############# Elasticsearch 정보 세팅
es_ip = "211.39.140.96"
es_port = 9201
es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60)

attr_dict = {"brand": "브랜드", "region": "지명", "person": "인명"}


def clear_scroll(scroll_id):
    try:
        es_conn.request("DELETE", "/_search/scroll",
                        json.dumps({"scroll_id": scroll_id}),
                        {"Content-Type": "application/json"})
    except OSError as oserror:
        ex = traceback.format_exc()
        logger.error("[clear_scroll] OS error : %s. Traceback >> %s " %
コード例 #3
0
'''
Created on 2017. 5. 30.

@author: Holly
'''
import pymysql
import slackweb
from com.wisenut.config import Config
from com.wisenut import myLogger
import logging

############# setting config
conf = Config()

############# setting logging
logger = myLogger.getMyLogger('mariadbclient', True, False, logging.DEBUG)

############# setting slack
slack = slackweb.Slack(
    "https://hooks.slack.com/services/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"
)

############# DB 정보 세팅
mariadb_ip = conf.get_mariadb_ip()
mariadb_port = conf.get_mariadb_port()
mariadb_user = conf.get_mariadb_user()
mariadb_password = conf.get_mariadb_password()
mariadb_db = conf.get_mariadb_db()
mariadb_charset = conf.get_mariadb_charset()

コード例 #4
0
'''
Created on 2017. 5. 30.

@author: Holly
'''
# -*- coding : utf-8 -*-
import logging
import sys, os
import zipfile
from com.wisenut.utils import file_util
from com.wisenut.config import Config
from com.wisenut import myLogger

############# setting logging
logger = myLogger.getMyLogger('excel_downloader', True, False, logging.DEBUG)

# zip file directory
conf = Config()
BASE_EXCEL_DIRECTORY = conf.get_report_home()

if __name__ == '__main__':
    logger.info("excel downloader starts.")

    if len(sys.argv) < 4:
        print("[ Usage ]")
        print(
            "\texcel_downloader <target_file_path> <save_file_path> <save_file_name>"
        )
        print("")

        exit
コード例 #5
0
# -*- coding: utf-8 -*-
'''
Created on 2017. 6. 13.

@author: Holly
'''
import http.client as hc
import logging
import json, re
import com.wisenut.dao.mariadbclient as mariadb
from com.wisenut.enums.query import Query
import traceback
from com.wisenut import myLogger

############# logger 세팅
logger = myLogger.getMyLogger("esclient", False, True, logging.DEBUG)

############# Elasticsearch 정보 세팅
#es_ip = "ec2-13-124-161-198.ap-northeast-2.compute.amazonaws.com"
es_ip = "211.39.140.96"
es_port = 9200
es_conn = hc.HTTPConnection(es_ip, es_port, timeout=300)

attr_dict = {"brand": "브랜드", "region": "지명", "person": "인명"}


class EsRejectedExecutionException(Exception):
    pass


def clear_scroll(scroll_id):
コード例 #6
0
    #for e in arr:
    m.update(repr(arr).encode('utf-8'))
    return m.hexdigest()


if __name__ == '__main__':
    mode = sys.argv[1]
    start_date = sys.argv[2]
    end_date = sys.argv[3]

    project_seqs = mariadbclient.get_all_projectseqs_of(
        'kdic')  # DB에서 kdic에 해당하는 project를 전체 가져와야함.

    ############# setting logging
    logger = myLogger.getMyLogger('kdic-topics-' + mode,
                                  hasConsoleHandler=False,
                                  hasRotatingFileHandler=True,
                                  logLevel=logging.DEBUG)

    logger.info(
        "=================================================================")
    logger.info("- ES Connection %s %d" % (es_ip, es_port))
    logger.info("- mode\t\t:\t%s" % mode)
    logger.info("- project_seqs\t:\t%s" %
                ','.join(str(seq[0]) for seq in project_seqs))
    logger.info("- start_date\t:\t%s" % start_date)
    logger.info("- end_date\t:\t%s" % end_date)
    logger.info(
        "=================================================================")

    #for project_seq in project_seqs.split(","):
    #logger.info(">>>>> project_seq  %s" % project_seq)
コード例 #7
0
ファイル: esclient.py プロジェクト: smilesyk2/yklab
# -*- coding: utf-8 -*- 
'''
Created on 2017. 6. 13.

@author: Holly
'''
import http.client as hc
import logging
import json, re
import com.wisenut.dao.mariadbclient as mariadb
from com.wisenut.enums.query import Query
import traceback
from com.wisenut import myLogger

############# logger 세팅
logger = myLogger.getMyLogger("esclient", False, True, logging.DEBUG)

############# Elasticsearch 정보 세팅
#es_ip = "ec2-13-124-161-198.ap-northeast-2.compute.amazonaws.com"
es_ip="211.39.140.96"
es_port = 9200
es_conn = hc.HTTPConnection(es_ip, es_port, timeout=300)

attr_dict = {
    "brand" : "브랜드",
    "region" : "지명",
    "person" : "인명"
}


コード例 #8
0
ファイル: report.py プロジェクト: smilesyk2/yklab
class Report:
    seq = -1
    reg_dt = ""
    report_type = ""
    project_name = ""
    channel = ""
    start_date = ""
    end_date = ""
    dataset_names = ""
    compare = False
    queryObj = None
    
    file_name = ""
    file_path = ""
    
    #BASE_EXCEL_DIRECTORY='/data/dmap-data/dmap-excel'
    conf = Config()
    BASE_EXCEL_DIRECTORY=conf.get_report_home()
    
    DOCUMENTS_FIELDS        = [ 'doc_datetime','doc_writer','doc_url','doc_title','doc_content','depth1_nm','depth2_nm','depth3_nm']
    DOCUMENTS_FIELDS_KOREAN = [ '게시일','작성자','URL','제목','내용','채널1','채널2','채널3']
    #EMOTIONS_FIELDS         = [ 'conceptlevel1', 'conceptlevel2', 'conceptlevel3', 'emotion_type', 'matched_text.string', 'depth1_nm', 'depth2_nm', 'depth3_nm', 'doc_datetime', 'doc_writer', 'doc_url', 'doc_title']
    EMOTIONS_FIELDS         = [ 'conceptlevel1', 'conceptlevel2', 'conceptlevel3', 'emotion_type', 'matched_text.string']
    #EMOTIONS_FIELDS_KOREAN  = [ '대분류', '중분류', '소분류', '감성', '분석문장', '채널1', '채널2', '채널3', '게시일', '작성자', 'URL', '제목']
    EMOTIONS_FIELDS_KOREAN  = [ '대분류', '중분류', '소분류', '감성', '분석문장']
    
    HEADER_FORMAT = {
        'bold' : True,
        'font_size' : 9,
        'bg_color' : '#F2F2F2',
        'align' : 'center',
        'border' : 1
    }
    DEFAULT_FORMAT = {
        'font_size' : 9,
        'border' : 1
    }
    
    logger = myLogger.getMyLogger("report", False, True, logging.DEBUG)
    
    def __init__(self, params):
        self.compare = True if params['compare_yn']=='Y' else False
        self.start_date = re.sub("[-:\s]", "", params['start_date'])[:8]
        self.end_date = re.sub("[-:\s]", "", params['end_date'])[:8]
        self.seq = params['seq']
        self.reg_dt = re.sub("[-:\s]", "", params['reg_dt'])
        self.report_type = db.get_exceltype_name(params['type_cd']) # RSP -> 리포트_소셜모니터링_추이분석
        self.project_name = db.get_project_name(params['project_seq'])
        self.channel = '전체' if not params['channels'] or params['channels']=='all' else "채널일부"
        
        self.dataset_names = ",".join([db.get_dataset_name(x) if db.get_dataset_name(x)!=None else 'unknown' for x in params['datasets'].split("^")]) if params['datasets'] else '' # 6^7^15 -> 신라면,안성탕면,짜파게티
        if os.name == 'nt' and bool(re.match("[\/\\\"*?<>\|]", self.dataset_names)):
            self.dataset_names = re.sub("[\/\\\"*?<>\|]", "_", self.dataset_names)
            
        self.queryObj = Query()
        
        compare_yn = "동일기간비교" if params['compare_yn']=='Y' else "해당기간"
        
        if not params['datasets']: # 검색트렌드
            self.file_name = "_".join([str(self.seq), self.report_type, self.start_date, self.end_date, compare_yn]) + ".xlsx"
        else: # 소셜모니터링
            if len(params['datasets'].split("^"))>1:
                self.file_name = "_".join([str(self.seq), self.report_type, self.channel, self.start_date, self.end_date, compare_yn]) + ".xlsx"
            else:
                self.file_name = "_".join([str(self.seq), self.report_type+"("+self.dataset_names+")", self.channel, self.start_date, self.end_date, compare_yn]) + ".xlsx"
                
        self.logger.info("=======================================================================================")
        for k, v in params.items():
            self.logger.info(k + " :\t\t" + str(v))
        self.logger.info("=======================================================================================")    
        
    def get_file_name(self):
        return self.file_name
    
    def create_file_path(self):
        self.file_path = os.path.join(self.BASE_EXCEL_DIRECTORY, self.reg_dt)
        return file_util.search_create_directory( self.file_path )
    
    # 표지   
    def cover_page(self, params):
        worksheet = self.workbook.add_worksheet('표지')
        worksheet.write(0, 0, '프로젝트명', self.header)
        worksheet.write(1, 0, '분석메뉴', self.header)
        worksheet.write(2, 0, '데이터셋', self.header)
        worksheet.write(3, 0, '기간(당기)', self.header)
        worksheet.write(4, 0, '채널', self.header)
        worksheet.write(0, 1, self.project_name, self.default)
        worksheet.write(1, 1, self.report_type, self.default)
        worksheet.write(2, 1, self.dataset_names, self.default)
        if params['compare_yn']=='Y':
            arr_date = []
            # 기준날짜
            start_date = date(int(params['start_date'][0:4]), int(params['start_date'][5:7]), int(params['start_date'][8:10]))
            end_date = date(int(params['end_date'][0:4]), int(params['end_date'][5:7]), int(params['end_date'][8:10]))
            
            for i in range(4):
                time_interval = end_date-start_date
                # 비교 날짜들(1time_interval before)
                this_end_date = end_date - (time_interval+timedelta(days=1))*i # 곱해진 간격만큼 이전 날짜를 구함
                
                arr_date.append("%s ~ %s"%((this_end_date-time_interval).strftime('%Y.%m.%d(%a)'), this_end_date.strftime('%Y.%m.%d(%a)')))
                
            worksheet.write(3, 1, ", ".join(arr_date), self.default)
        else:
            worksheet.write(3, 1, "~".join([self.start_date, self.end_date]), self.default)
            
        if self.channel=="채널일부" and len(params['channels'].split(";"))>1:
            self.channel += "("
            for c in params['channels'].split(";"):
                channel_info = db.get_channel_name(Channel.DEPTH1.value, c.split("^")[0])
                if channel_info:
                    self.channel += channel_info[0] + ","
                    
            self.channel = re.sub(",$", "", self.channel)
            self.channel += ")"
                
        worksheet.write(4, 1, self.channel, self.default)
コード例 #9
0
ファイル: excel_maker.py プロジェクト: smilesyk2/yklab
# -*- coding : utf-8 -*-
'''
Created on 2017. 5. 30.

@author: Holly
'''
import com.wisenut.dao.mariadbclient as db
from com.wisenut.reports.report_emotions import ReportEmotions
from com.wisenut.reports.report_stats import ReportStatistics
from com.wisenut.reports.report_count import ReportCount
from com.wisenut.reports.report_trend import ReportTrend
import traceback
import logging
from com.wisenut import myLogger

logger = myLogger.getMyLogger("excel_maker", False, True, logging.DEBUG)

if __name__ == '__main__':
    #1. 엑셀 다운로드 요청 목록을 테이블에서 가져옴.
    for req in db.get_excel_request():
        logger.debug(req)
        
        #2. 리포트 타입별로 엑셀 꾸미기
        if req['type_cd']=='RSS': # 수집문서통계
            report = ReportStatistics(req)
        elif req['type_cd']=='RSE': # 감성분석
            report = ReportEmotions(req)
        elif req['type_cd']=='RTC': # 검색트렌드 - 조회수
            report = ReportCount(req)
        elif req['type_cd']=='RTT': # 검색트렌드 - 트렌드
            report = ReportTrend(req)
コード例 #10
0
ファイル: report_kdic_doc.py プロジェクト: yksung/pyworks
import math
from datetime import datetime as dt
from com.wisenut.enums.query import Query
from datetime import timedelta
from com.wisenut.config import Config
import sys
import xlsxwriter
import copy
import pymysql
import time
import socket
from com.wisenut import myLogger
import logging

############# setting logging
logger = myLogger.getMyLogger('kdic-report-maker', False, True, logging.DEBUG)

MODE_DOCUMENTS='documents'
MODE_TOPICS='topics'
MODE_EMOTIONS='emotions'
MODE_TREND='trend'

INDEX_DOCUMENTS="documents-*"
INDEX_TOPICS="topics-*"
INDEX_EMOTIONS="emotions-*"

RETRY_TIMES=5
SLEEP_FOR_WHEN_RETRY=30 # seconds

class ReportKDICDocuments:
    mode = ""
コード例 #11
0
import aiohttp
import urllib3
import traceback
import math

PAGE_SIZE=1000
# =========== elasticsearch ===========
es_ip = "211.39.140.96"
es_port = 9201

INDEX_TOPICS="topics"
INDEX_DOCUMENTS="documents"
TYPE_DOC="doc"

# =========== Logger ===========
logger = myLogger.getMyLogger('related_word_sticker', hasConsoleHandler=False, hasRotatingFileHandler=True, logLevel=logging.DEBUG)


class EsError(Exception):
    pass



class NoMecabAvailable(Exception):
    pass



async def isNoun(text, debug=False):
    from subprocess import run, PIPE
    
コード例 #12
0
# -*- coding : utf-8 -*-
'''
Created on 2017. 5. 30.

@author: Holly
'''
import com.wisenut.dao.mariadbclient as db
from com.wisenut.reports.report_emotions import ReportEmotions
from com.wisenut.reports.report_stats import ReportStatistics
from com.wisenut.reports.report_count import ReportCount
from com.wisenut.reports.report_trend import ReportTrend
import traceback
import logging
from com.wisenut import myLogger

logger = myLogger.getMyLogger("excel_maker", False, True, logging.DEBUG)

if __name__ == '__main__':
    #1. 엑셀 다운로드 요청 목록을 테이블에서 가져옴.
    for req in db.get_excel_request():
        logger.debug(req)

        #2. 리포트 타입별로 엑셀 꾸미기
        if req['type_cd'] == 'RSS':  # 수집문서통계
            report = ReportStatistics(req)
        elif req['type_cd'] == 'RSE':  # 감성분석
            report = ReportEmotions(req)
        elif req['type_cd'] == 'RTC':  # 검색트렌드 - 조회수
            report = ReportCount(req)
        elif req['type_cd'] == 'RTT':  # 검색트렌드 - 트렌드
            report = ReportTrend(req)
コード例 #13
0
ファイル: mariadbclient.py プロジェクト: smilesyk2/yklab
# -*- coding: utf-8 -*-
'''
Created on 2017. 5. 30.

@author: Holly
'''
import pymysql
import re
import logging
from com.wisenut import myLogger

############# logger 세팅
logger = myLogger.getMyLogger("mariadbclient", False, True, logging.DEBUG)

############# DB 정보 세팅
mariadb_ip = "211.39.140.249"
mariadb_port = 3306
mariadb_user = "******"
mariadb_password = "******"
mariadb_db = "dmap_base"
mariadb_charset = "utf8"
'''
    조회수 쿼리
'''


def get_data_for_report_count(type_cd,
                              trend_grp_seq,
                              start_date,
                              end_date,
                              trend_dataset_seq=0,
コード例 #14
0
ファイル: mariadbclient.py プロジェクト: smilesyk2/yklab
# -*- coding: utf-8 -*- 
'''
Created on 2017. 5. 30.

@author: Holly
'''
import pymysql
import re
import logging
from com.wisenut import myLogger

############# logger 세팅
logger = myLogger.getMyLogger("mariadbclient", False, True, logging.DEBUG)

############# DB 정보 세팅
mariadb_ip="211.39.140.249"
mariadb_port=3306
mariadb_user="******"
mariadb_password="******"
mariadb_db="dmap_base"
mariadb_charset="utf8"

'''
    조회수 쿼리
'''
def get_data_for_report_count(type_cd, trend_grp_seq, start_date, end_date, trend_dataset_seq=0, trend_keyword_seq=0):
    conn = pymysql.connect(host=mariadb_ip, port=mariadb_port, user=mariadb_user, password=mariadb_password, db=mariadb_db, charset=mariadb_charset, connect_timeout=60)
    curs = conn.cursor()
    
    sql = '''
        SELECT TG.name AS trend_grp
コード例 #15
0
ファイル: emotion_exporter.py プロジェクト: yksung/pyworks
    m = hashlib.md5()
    #for e in arr:
    m.update(repr(arr).encode('utf-8'))
    return m.hexdigest()


if __name__ == '__main__':
    process_name = sys.argv[1]
    project_seqs = sys.argv[2]  # only one project_seq per emotional analysis
    start_date = sys.argv[3]
    end_date = sys.argv[4]

    ############# setting logging

    logger = myLogger.getMyLogger(process_name,
                                  hasConsoleHandler=False,
                                  hasRotatingFileHandler=True,
                                  logLevel=logging.DEBUG)

    logger.info(
        "=================================================================")
    logger.info("- ES Connection %s %d" % (es_ip, es_port))
    logger.info("- process_name\t:\t%s" % process_name)
    logger.info("- project_seqs\t:\t%s" % project_seqs)
    logger.info("- start_date\t:\t%s" % start_date)
    logger.info("- end_date\t:\t%s" % end_date)
    logger.info(
        "=================================================================")

    for project_seq in project_seqs.split(","):
        logger.info(
            "================================================================="