Ejemplo n.º 1
0
import sys

slack = slackweb.Slack(
    "https://hooks.slack.com/services/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"
)

PAGE_SIZE = 1000
TOPICS_TO_SEARCH = "topics*"
INDEX_DOCUMENTS = "documents*"
TYPE_DOC = "doc"

es_ip = "211.39.140.96"
es_port = 9200

############# logger 세팅
conf = Config()
logging.config.fileConfig(conf.get_logconfig_path())
logger = logging.getLogger(__name__)

logger.info("ES Connection %s %d" % (es_ip, es_port))


class EsError(Exception):
    pass


async def get_recent_index(index):
    es = Elasticsearch(['%s:%d' % (es_ip, es_port)])
    cat2es = CatClient(es)
    result = await cat2es.indices(index, h="index")
    '''
Ejemplo n.º 2
0
@author: Holly
'''
# -*- coding : utf-8 -*-
import logging
import sys, os
import zipfile
from com.wisenut.utils import file_util
from com.wisenut.config import Config
from com.wisenut import myLogger

############# setting logging
logger = myLogger.getMyLogger('excel_downloader', True, False, logging.DEBUG)

# zip file directory
conf = Config()
BASE_EXCEL_DIRECTORY = conf.get_report_home()

if __name__ == '__main__':
    logger.info("excel downloader starts.")

    if len(sys.argv) < 4:
        print("[ Usage ]")
        print(
            "\texcel_downloader <target_file_path> <save_file_path> <save_file_name>"
        )
        print("")

        exit

    print(sys.argv)
Ejemplo n.º 3
0
class Report:
    seq = -1
    reg_dt = ""
    report_type = ""
    project_name = ""
    channel = ""
    start_date = ""
    end_date = ""
    dataset_names = ""
    compare = False
    queryObj = None
    
    file_name = ""
    file_path = ""
    
    #BASE_EXCEL_DIRECTORY='/data/dmap-data/dmap-excel'
    conf = Config()
    BASE_EXCEL_DIRECTORY=conf.get_report_home()
    
    DOCUMENTS_FIELDS        = [ 'doc_datetime','doc_writer','doc_url','doc_title','doc_content','depth1_nm','depth2_nm','depth3_nm']
    DOCUMENTS_FIELDS_KOREAN = [ '게시일','작성자','URL','제목','내용','채널1','채널2','채널3']
    #EMOTIONS_FIELDS         = [ 'conceptlevel1', 'conceptlevel2', 'conceptlevel3', 'emotion_type', 'matched_text.string', 'depth1_nm', 'depth2_nm', 'depth3_nm', 'doc_datetime', 'doc_writer', 'doc_url', 'doc_title']
    EMOTIONS_FIELDS         = [ 'conceptlevel1', 'conceptlevel2', 'conceptlevel3', 'emotion_type', 'matched_text.string']
    #EMOTIONS_FIELDS_KOREAN  = [ '대분류', '중분류', '소분류', '감성', '분석문장', '채널1', '채널2', '채널3', '게시일', '작성자', 'URL', '제목']
    EMOTIONS_FIELDS_KOREAN  = [ '대분류', '중분류', '소분류', '감성', '분석문장']
    
    HEADER_FORMAT = {
        'bold' : True,
        'font_size' : 9,
        'bg_color' : '#F2F2F2',
        'align' : 'center',
        'border' : 1
    }
    DEFAULT_FORMAT = {
        'font_size' : 9,
        'border' : 1
    }
    
    logger = myLogger.getMyLogger("report", False, True, logging.DEBUG)
    
    def __init__(self, params):
        self.compare = True if params['compare_yn']=='Y' else False
        self.start_date = re.sub("[-:\s]", "", params['start_date'])[:8]
        self.end_date = re.sub("[-:\s]", "", params['end_date'])[:8]
        self.seq = params['seq']
        self.reg_dt = re.sub("[-:\s]", "", params['reg_dt'])
        self.report_type = db.get_exceltype_name(params['type_cd']) # RSP -> 리포트_소셜모니터링_추이분석
        self.project_name = db.get_project_name(params['project_seq'])
        self.channel = '전체' if not params['channels'] or params['channels']=='all' else "채널일부"
        
        self.dataset_names = ",".join([db.get_dataset_name(x) if db.get_dataset_name(x)!=None else 'unknown' for x in params['datasets'].split("^")]) if params['datasets'] else '' # 6^7^15 -> 신라면,안성탕면,짜파게티
        if os.name == 'nt' and bool(re.match("[\/\\\"*?<>\|]", self.dataset_names)):
            self.dataset_names = re.sub("[\/\\\"*?<>\|]", "_", self.dataset_names)
            
        self.queryObj = Query()
        
        compare_yn = "동일기간비교" if params['compare_yn']=='Y' else "해당기간"
        
        if not params['datasets']: # 검색트렌드
            self.file_name = "_".join([str(self.seq), self.report_type, self.start_date, self.end_date, compare_yn]) + ".xlsx"
        else: # 소셜모니터링
            if len(params['datasets'].split("^"))>1:
                self.file_name = "_".join([str(self.seq), self.report_type, self.channel, self.start_date, self.end_date, compare_yn]) + ".xlsx"
            else:
                self.file_name = "_".join([str(self.seq), self.report_type+"("+self.dataset_names+")", self.channel, self.start_date, self.end_date, compare_yn]) + ".xlsx"
                
        self.logger.info("=======================================================================================")
        for k, v in params.items():
            self.logger.info(k + " :\t\t" + str(v))
        self.logger.info("=======================================================================================")    
        
    def get_file_name(self):
        return self.file_name
    
    def create_file_path(self):
        self.file_path = os.path.join(self.BASE_EXCEL_DIRECTORY, self.reg_dt)
        return file_util.search_create_directory( self.file_path )
    
    # 표지   
    def cover_page(self, params):
        worksheet = self.workbook.add_worksheet('표지')
        worksheet.write(0, 0, '프로젝트명', self.header)
        worksheet.write(1, 0, '분석메뉴', self.header)
        worksheet.write(2, 0, '데이터셋', self.header)
        worksheet.write(3, 0, '기간(당기)', self.header)
        worksheet.write(4, 0, '채널', self.header)
        worksheet.write(0, 1, self.project_name, self.default)
        worksheet.write(1, 1, self.report_type, self.default)
        worksheet.write(2, 1, self.dataset_names, self.default)
        if params['compare_yn']=='Y':
            arr_date = []
            # 기준날짜
            start_date = date(int(params['start_date'][0:4]), int(params['start_date'][5:7]), int(params['start_date'][8:10]))
            end_date = date(int(params['end_date'][0:4]), int(params['end_date'][5:7]), int(params['end_date'][8:10]))
            
            for i in range(4):
                time_interval = end_date-start_date
                # 비교 날짜들(1time_interval before)
                this_end_date = end_date - (time_interval+timedelta(days=1))*i # 곱해진 간격만큼 이전 날짜를 구함
                
                arr_date.append("%s ~ %s"%((this_end_date-time_interval).strftime('%Y.%m.%d(%a)'), this_end_date.strftime('%Y.%m.%d(%a)')))
                
            worksheet.write(3, 1, ", ".join(arr_date), self.default)
        else:
            worksheet.write(3, 1, "~".join([self.start_date, self.end_date]), self.default)
            
        if self.channel=="채널일부" and len(params['channels'].split(";"))>1:
            self.channel += "("
            for c in params['channels'].split(";"):
                channel_info = db.get_channel_name(Channel.DEPTH1.value, c.split("^")[0])
                if channel_info:
                    self.channel += channel_info[0] + ","
                    
            self.channel = re.sub(",$", "", self.channel)
            self.channel += ")"
                
        worksheet.write(4, 1, self.channel, self.default)
Ejemplo n.º 4
0
# -*- coding : utf-8 -*-
'''
Created on 2017. 5. 30.

@author: Holly
'''
import pymysql
import slackweb
from com.wisenut.config import Config
from com.wisenut import myLogger
import logging

############# setting config
conf = Config()

############# setting logging
logger = myLogger.getMyLogger('mariadbclient', True, False, logging.DEBUG)

############# setting slack
slack = slackweb.Slack(
    "https://hooks.slack.com/services/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"
)

############# DB 정보 세팅
mariadb_ip = conf.get_mariadb_ip()
mariadb_port = conf.get_mariadb_port()
mariadb_user = conf.get_mariadb_user()
mariadb_password = conf.get_mariadb_password()
mariadb_db = conf.get_mariadb_db()
mariadb_charset = conf.get_mariadb_charset()
Ejemplo n.º 5
0
import json
import re
import math
import copy
import sys
import hashlib
import urllib
import urllib3
import slackweb
import traceback
import http.client
from com.wisenut.enums.query import Query
from com.wisenut import myLogger

############# setting config
conf = Config()

############# setting elasticsearch
es_ip = conf.get_es_ip()
es_port = conf.get_es_port()

############# setting slack
slack = slackweb.Slack(
    "https://hooks.slack.com/services/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"
)
slackChannel = "#kdic-error-alert"
slackUserName = "******"
slackIconEmoji = ":ghost:"

############# setting search options
PAGE_SIZE = 1000
Ejemplo n.º 6
0
# -*- coding: utf-8 -*-  
'''
Created on 2017. 6. 13.

@author: Holly
'''
from socket import *
import codecs
from com.wisenut.config import Config
import logging.config
from _elementtree import ParseError
import time

############# logger 세팅
conf = Config()
logging.config.fileConfig(conf.get_logconfig_path())
logger = logging.getLogger(__name__)

tea_host = "211.39.140.71"
tea_port = 11000
ITEM_DELIMITER = "^"
WEIGHT_DELIMITER = ":"

def request(params, timeout=2):
    
    #1. send data
    content = "<![CDATA[<DOCID>\n<TITLE>"+params['_source']['doc_title'].strip()+"\n<CONTENT>"+params['_source']['doc_content'].strip()+"\n<TERMS>\n<TOPIC>]]>"

    body_head = "<?xml version=\"1.0\" encoding=\"utf-8\" ?><request><command>extractor</command><request_type>realtime</request_type><request_id>900000</request_id><params><param name=\"collection_id\">dmap_data</param><param name=\"content\">"
    body_tail = "</param><param name=\"item_delimiter\">"+ITEM_DELIMITER+"</param><param name=\"weight_delimiter\">"+WEIGHT_DELIMITER+"</param></params></request>"
    
Ejemplo n.º 7
0
class ES2SCD:
    seq = -1
    reg_dt = ""
    report_type = ""
    project_name = ""
    channel = ""
    start_date = ""
    end_date = ""
    dataset_names = ""
    query = None
    compare = ''
    save_path = ""

    file_name = ""
    file_path = ""

    #BASE_EXCEL_DIRECTORY='/data/dmap-data/dmap-excel'
    conf = Config()
    BASE_EXCEL_DIRECTORY=conf.get_report_home()

    def __init__(self, params):
        self.seq = params['seq']
        self.compare = True if params['compare_yn']=='Y' else False

        self.start_date = re.sub("[-:T\s]", "", params['start_date'])[:12]
        self.end_date = re.sub("[-:T\s]", "", params['end_date'])[:12]
        self.reg_dt = re.sub("[-:T\s]", "", params['reg_dt'])

        self.dataset_names = ",".join([db.get_dataset_name(x) if db.get_dataset_name(x)!=None else 'unknown' for x in str(params['datasets']).split("^")]) if params['datasets'] else '' # 6^7^15 -> 신라면,안성탕면,짜파게티
        self.query = Query(params)

        self.file_name = "B-%d-%s-I-C.SCD" % (self.seq, get_current_datetime())


    def get_file_name(self):
        return self.file_name

    def create_file_path(self, path):
        self.file_path = path
        return file_util.search_create_directory( self.file_path )
        # if mode == 'documents':
        #     '''
        #     - documents는 report 폴더 아래 Social 디렉터리 아래 떨어지게 됨.
        #     '''
        #     self.file_path = os.path.join(self.BASE_EXCEL_DIRECTORY, self.reg_dt, 'raw')
        #     return file_util.search_create_directory( self.file_path )
        # else:
        #     '''
        #     - topics는 report 폴더 아래 Social_topics 디렉터리 아래 떨어지게 됨.
        #     '''
        #     self.file_path = os.path.join(self.BASE_EXCEL_DIRECTORY, self.reg_dt, 'topic')
        #     return file_util.search_create_directory( self.file_path )





    # 원문
    def create_documents_list(self, params, index):
        size = 10000 # 페이징 사이즈
       
        # 검색 시작
        result = es.get_documents(params, size, index, "")

        #worksheet = self.workbook.add_worksheet("원문(%s)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]]))

        # 엑셀 헤더
        '''
        for colidx, field in enumerate(output_fields_korean):
            worksheet.write(0, colidx, field, self.header)
        '''

        if "hits" in result and result["hits"]["total"] > 0:
            scdfile = codecs.open(os.path.join(self.file_path, self.file_name), 'w', 'utf-8')
            
            for this_result in result["hits"]["hits"]:
                for field in FIELDS_DOCUMENTS:
                    if field == 'doc_id':
                        val = this_result["_id"]
                        #worksheet.write(row+1, col, val, self.default)
                        scdfile.write("<DOCID>%s"%val)
                        scdfile.write("\r\n")
                        
                        continue
                        
                    val = this_result["_source"][field] if field in this_result["_source"] else "null"
                    #worksheet.write(row+1, col, val, self.default)
                    scdfile.write("<%s>%s" % (field, val))
                    scdfile.write("\r\n")


            # 결과건수가 한 페이지 사이즈보다 큰 경우, scroll을 이용해서 paging하며 결과를 가져옴.
            # 용량이 클 것으로 예상하여 엑셀 파일도 새로 생성.
            if "hits" in result and result["hits"]["total"] > size:
                for page in range(1, math.ceil(result["hits"]["total"]/size)): # 0, 1, 2, ....
                    scrolled_result = es.get_documents(params, size, index, scroll_id=result["_scroll_id"])
                    for this_result in scrolled_result["hits"]["hits"]:
                        for field in FIELDS_DOCUMENTS:
                            if field == 'doc_id':
                                val = this_result["_id"]
                                #worksheet.write(row+1, col, val, self.default)
                                scdfile.write("<DOCID>%s"%val)
                                scdfile.write("\r\n")
                                
                                continue
                                
                            val = this_result["_source"][field] if field in this_result["_source"] else "null"
                            #worksheet.write(row+1, col, val, self.default)
                            scdfile.write("<%s>%s" % (field, val))
                            scdfile.write("\r\n")

                    if page == math.ceil(result["hits"]["total"]/size)-1: # 마지막 페이지를 처리하고 나면 scroll을 clear
                        if result["_scroll_id"]:
                            es.clear_scroll(result["_scroll_id"])
                            
            scdfile.close()
Ejemplo n.º 8
0
class Report:
    seq = -1
    reg_dt = ""
    report_type = ""
    project_name = ""
    channel = ""
    start_date = ""
    end_date = ""
    dataset_names = ""
    compare = False
    query = None

    file_name = ""
    file_path = ""

    #BASE_EXCEL_DIRECTORY='/data/dmap-data/dmap-excel'
    conf = Config()
    BASE_EXCEL_DIRECTORY = conf.get_report_home()

    DOCUMENTS_FIELDS = [
        'doc_datetime', 'doc_writer', 'doc_url', 'doc_title', 'doc_content',
        'depth1_nm', 'depth2_nm', 'depth3_nm'
    ]
    DOCUMENTS_FIELDS_KOREAN = [
        '게시일', '작성자', 'URL', '제목', '내용', '채널1', '채널2', '채널3'
    ]
    EMOTIONS_FIELDS = [
        'conceptlevel1', 'conceptlevel2', 'conceptlevel3', 'emotion_type',
        'matched_text.string', 'depth1_nm', 'depth2_nm', 'depth3_nm',
        'doc_datetime', 'doc_writer', 'doc_url', 'doc_title'
    ]
    EMOTIONS_FIELDS_KOREAN = [
        '대분류', '중분류', '소분류', '감성', '분석문장', '채널1', '채널2', '채널3', '게시일', '작성자',
        'URL', '제목'
    ]

    HEADER_FORMAT = {
        'bold': True,
        'font_size': 9,
        'bg_color': '#F2F2F2',
        'align': 'center',
        'border': 1
    }
    DEFAULT_FORMAT = {'font_size': 9, 'border': 1}

    def __init__(self, params):
        self.compare = True if params['compare_yn'] == 'Y' else False
        self.start_date = re.sub("[-:\s]", "", params['start_date'])[:8]
        self.end_date = re.sub("[-:\s]", "", params['end_date'])[:8]
        self.seq = params['seq']
        self.reg_dt = re.sub("[-:\s]", "", params['reg_dt'])
        self.report_type = db.get_exceltype_name(
            params['type_cd'])  # RSP -> 리포트_소셜모니터링_추이분석
        self.project_name = db.get_project_name(params['project_seq'])
        self.channel = '전체' if not params['channels'] or params[
            'channels'] == 'all' else "채널일부"

        self.dataset_names = ",".join([
            db.get_dataset_name(x)
            if db.get_dataset_name(x) != None else 'unknown'
            for x in params['datasets'].split("^")
        ]) if params['datasets'] else ''  # 6^7^15 -> 신라면,안성탕면,짜파게티
        if os.name == 'nt' and bool(
                re.match("[\/\\\"*?<>\|]", self.dataset_names)):
            self.dataset_names = re.sub("[\/\\\"*?<>\|]", "_",
                                        self.dataset_names)

        self.query = Query(params)

        compare_yn = "동일기간비교" if params['compare_yn'] == 'Y' else "해당기간"

        if not params['datasets']:  # 검색트렌드
            self.file_name = "_".join([
                str(self.seq), self.report_type, self.start_date,
                self.end_date, compare_yn
            ]) + ".xlsx"
        else:  # 소셜모니터링
            if len(params['datasets'].split("^")) > 1:
                self.file_name = "_".join([
                    str(self.seq), self.report_type, self.channel,
                    self.start_date, self.end_date, compare_yn
                ]) + ".xlsx"
            else:
                self.file_name = "_".join([
                    str(self.seq),
                    self.report_type + "(" + self.dataset_names + ")",
                    self.channel, self.start_date, self.end_date, compare_yn
                ]) + ".xlsx"

    def get_file_name(self):
        return self.file_name

    def create_file_path(self):
        self.file_path = os.path.join(self.BASE_EXCEL_DIRECTORY, self.reg_dt)
        return file_util.search_create_directory(self.file_path)

    # 표지
    def cover_page(self, params):
        worksheet = self.workbook.add_worksheet('표지')
        worksheet.write(0, 0, '프로젝트명', self.header)
        worksheet.write(1, 0, '분석메뉴', self.header)
        worksheet.write(2, 0, '데이터셋', self.header)
        worksheet.write(3, 0, '기간(당기)', self.header)
        worksheet.write(4, 0, '채널', self.header)
        worksheet.write(0, 1, self.project_name, self.default)
        worksheet.write(1, 1, self.report_type, self.default)
        worksheet.write(2, 1, self.dataset_names, self.default)
        if params['compare_yn'] == 'Y':
            arr_date = []
            # 기준날짜
            start_date = date(int(params['start_date'][0:4]),
                              int(params['start_date'][5:7]),
                              int(params['start_date'][8:10]))
            end_date = date(int(params['end_date'][0:4]),
                            int(params['end_date'][5:7]),
                            int(params['end_date'][8:10]))

            for i in range(4):
                time_interval = end_date - start_date
                # 비교 날짜들(1time_interval before)
                this_end_date = end_date - (time_interval + timedelta(days=1)
                                            ) * i  # 곱해진 간격만큼 이전 날짜를 구함

                arr_date.append(
                    "%s ~ %s" %
                    ((this_end_date - time_interval).strftime('%Y.%m.%d(%a)'),
                     this_end_date.strftime('%Y.%m.%d(%a)')))

            worksheet.write(3, 1, ", ".join(arr_date), self.default)
        else:
            worksheet.write(3, 1, "~".join([self.start_date, self.end_date]),
                            self.default)

        if self.channel == "채널일부" and len(params['channels'].split(";")) > 1:
            self.channel += "("
            for c in params['channels'].split(";"):
                channel_info = db.get_channel_name(Channel.DEPTH1.value,
                                                   c.split("^")[0])
                if channel_info:
                    self.channel += channel_info[0] + ","

            self.channel = re.sub(",$", "", self.channel)
            self.channel += ")"

        worksheet.write(4, 1, self.channel, self.default)

    # 원문
    def create_documents_list(self, params, index):
        size = 10000  # 페이징 사이즈
        output_fields_korean = self.DOCUMENTS_FIELDS_KOREAN if index.startswith(
            'documents') else self.EMOTIONS_FIELDS_KOREAN
        output_fields = self.DOCUMENTS_FIELDS if index.startswith(
            'documents') else self.EMOTIONS_FIELDS

        # 검색 시작
        #result = es.get_documents(params, size, index, "")
        totalCount = es.get_documents_count(params, index)

        #if "hits" in result and result["hits"]["total"] > 0:
        if totalCount > 0:
            scroll_id = None

            # 결과건수가 한 페이지 사이즈보다 큰 경우, scroll을 이용해서 paging하며 결과를 가져옴.
            # 용량이 클 것으로 예상하여 엑셀 파일도 새로 생성.
            #if "hits" in result and result["hits"]["total"] > size:
            for page in range(math.ceil(totalCount / size)):  # 0, 1, 2, ....
                worksheet = self.workbook.add_worksheet(
                    "원문(%s)(%d)" % ("~".join([
                        params['start_date'][0:10], params['end_date'][0:10]
                    ]), page + 1))  #>%s(%d)"%(this_dataset_name,page))
                scrolled_result = es.get_documents(params, size, index,
                                                   scroll_id)
                scroll_id = scrolled_result['_scroll_id']

                # 엑셀 헤더
                for colidx, field in enumerate(output_fields_korean):
                    worksheet.write(0, colidx, field, self.header)

                for row, this_result in enumerate(
                        scrolled_result["hits"]["hits"]):
                    for col, field in enumerate(output_fields):
                        if "." in field:
                            field, subfield = field.split(".")

                            val = this_result["_source"][field][
                                subfield] if field in this_result[
                                    "_source"] and subfield in this_result[
                                        "_source"][field] else "null"
                            worksheet.write(row + 1, col, val, self.default)
                        else:
                            val = this_result["_source"][
                                field] if field in this_result[
                                    "_source"] else "null"
                            worksheet.write(row + 1, col, val, self.default)

                if page == math.ceil(
                        totalCount /
                        size) - 1:  # 마지막 페이지를 처리하고 나면 scroll을 clear
                    if '_scroll_id' in scrolled_result and scrolled_result[
                            "_scroll_id"]:
                        es.clear_scroll(scroll_id)
Ejemplo n.º 9
0
class ReportKDICDocuments:
    mode = ""
    seq = -1
    reg_dt = ""
    report_day = ""
    report_time = ""
    report_type = ""
    project_name = ""
    channel = ""
    start_date = ""
    end_date = ""
    dataset_names = ""
    query = None
    compare = ''
    save_path = ""

    file_name = ""
    file_path = ""

    #BASE_EXCEL_DIRECTORY='/data/dmap-data/dmap-excel'
    conf = Config()
    BASE_EXCEL_DIRECTORY=conf.get_report_home()

    HEADER_FORMAT = {
        'bold' : True,
        'font_size' : 9,
        'bg_color' : '#F2F2F2',
        'align' : 'center',
        'border' : 1
    }
    DEFAULT_FORMAT = {
        'font_size' : 9,
        'border' : 1
    }

    def __init__(self, params):
        self.mode = params['mode']
        self.compare = True if params['compare_yn']=='Y' else False

        self.start_date = re.sub("[-:T\s]", "", params['start_date'])[:12]
        self.end_date = re.sub("[-:T\s]", "", params['end_date'])[:12]
        self.reg_dt = re.sub("[-:T\s]", "", params['reg_dt'])

        self.dataset_names = ",".join([db.get_dataset_name(x) if db.get_dataset_name(x)!=None else 'unknown' for x in str(params['datasets']).split("^")]) if params['datasets'] else '' # 6^7^15 -> 신라면,안성탕면,짜파게티
        self.query = Query(params)

        if mode == MODE_DOCUMENTS:
            self.file_name = "_".join(["SNS", self.dataset_names, self.start_date, self.end_date]) + ".xlsx"
        elif mode == MODE_TOPICS:
            self.file_name = "_".join(["화제어", self.dataset_names, self.start_date, self.end_date]) + ".xlsx"
        elif mode == MODE_EMOTIONS:
            self.file_name = "_".join(["감성분석", self.dataset_names, self.start_date, self.end_date]) + ".xlsx"
        elif mode == MODE_TREND:
            self.file_name = "_".join(["연관검색어", str(params['project_seq']), self.start_date, self.end_date]) + ".xlsx"


    def get_file_name(self):
        return self.file_name

    def create_file_path(self, path):
        self.file_path = path
        return file_util.search_create_directory( self.file_path )
        # if mode == 'documents':
        #     '''
        #     - documents는 report 폴더 아래 Social 디렉터리 아래 떨어지게 됨.
        #     '''
        #     self.file_path = os.path.join(self.BASE_EXCEL_DIRECTORY, self.reg_dt, 'raw')
        #     return file_util.search_create_directory( self.file_path )
        # else:
        #     '''
        #     - topics는 report 폴더 아래 Social_topics 디렉터리 아래 떨어지게 됨.
        #     '''
        #     self.file_path = os.path.join(self.BASE_EXCEL_DIRECTORY, self.reg_dt, 'topic')
        #     return file_util.search_create_directory( self.file_path )


    def topics_list(self, params):
        worksheet = self.workbook.add_worksheet("화제어(%s)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]]))
        # 헤더
        # 날짜 형식은 YYYYMMDD 이어야 함
        worksheet.write(0, 0, '날짜', self.header)
        worksheet.write(0, 1, '순위', self.header)
        worksheet.write(0, 2, '화제어', self.header)
        worksheet.write(0, 3, '문서수', self.header)
        worksheet.write(0, 4, '연관어', self.header)
        worksheet.write(0, 5, '문서수', self.header)

        # 데이터
        result_topic = es.get_aggregations(self.query.ALL_TOPICS_LIST(params['dataset_name']), params, Query.INDEX_TOPICS)
        row=0
        seq=0 # topic의 순위
        #topics_date = params['start_date'][0:10].replace('-','')

        for bucket0 in result_topic['aggregations']['my_aggs0']['buckets']:
            for bucket1 in bucket0['my_aggs1']['buckets']:
                topic = re.sub("[\+=\-/]", "", str(bucket1['key']))
                seq += 1
                
                topics_date = bucket0['key_as_string']
                
                if len(bucket1['my_aggs2']['buckets'])>0:
                    for bucket2 in bucket1['my_aggs2']['buckets']:
                        str(startdate.strftime('%Y-%m-%dT%H:00:00'))
                        # worksheet.write(1+row, 0, params['start_date'][0:10].replace('-',''), self.default)
                        worksheet.write(1+row, 0, re.sub("-","", topics_date[:topics_date.find("T")]), self.default)
                        worksheet.write(1+row, 1, seq, self.default)
                        worksheet.write(1+row, 2, re.sub("[\[\]]", "", topic), self.default)
                        worksheet.write(1+row, 3, bucket1['doc_count'], self.default)
                        worksheet.write(1+row, 4, bucket2['key'], self.default)
                        worksheet.write(1+row, 5, bucket2['doc_count'], self.default)
                        #worksheet.write(1+row, 6, verb_list, self.default)
                        row += 1
                        
                else:
                    worksheet.write(1+row, 0, re.sub("-","", topics_date[:topics_date.find("T")]), self.default)
                    worksheet.write(1+row, 1, seq, self.default)
                    worksheet.write(1+row, 2, re.sub("[\[\]]", "", topic), self.default)
                    worksheet.write(1+row, 3, bucket1['doc_count'], self.default)
                    worksheet.write(1+row, 4, '', self.default)
                    worksheet.write(1+row, 5, '', self.default)
                    #worksheet.write(1+row, 6, '', self.default)
                    row += 1
        
        logger.info("<%s> Total Topics : %d" % (self.dataset_names, row) )



    def emotions_per_causes(self, params):
        worksheet = self.workbook.add_worksheet("강성분석(%s)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]]))

        # 헤더
        # 날짜 형식은 YYYYMMDD 이어야 함
        worksheet.write(0, 0, '날짜', self.header)
        worksheet.write(0, 1, '채널1', self.header)
        worksheet.write(0, 2, '채널2', self.header)
        worksheet.write(0, 3, '채널3', self.header)
        worksheet.write(0, 4, '대분류', self.header)
        worksheet.write(0, 5, '중분류', self.header)
        worksheet.write(0, 6, '소분류', self.header)
        worksheet.write(0, 7, '긍부정', self.header)
        worksheet.write(0, 8, '문서수', self.header)

        # 데이터
        qdsl = self.query.EMOTIONS_PER_CAUSES()
        result = es.get_aggregations(copy.copy(qdsl), params, INDEX_EMOTIONS)
        #total = result['hits']['total']
        total = 0
        row = 0
        #emotions_date = params['start_date'][0:10].replace('-','')

        for bucket0 in result['aggregations']['my_aggs0']['buckets']:
            for bucket1 in bucket0['my_aggs1']['buckets']:
                for bucket2 in bucket1['my_aggs2']['buckets']:
                    for bucket5 in bucket2['my_aggs3']['my_aggs4']['my_aggs5']['buckets']:
                        # 2018.01.11 "(주)"가 포함된 경우에는 (주)를 뺀 나머지 이름이 포함됐는지 확인해야 하므로 변경.
                        if params['dataset_name'].find(bucket2['key']) >= 0 :
                            depth_level = bucket1['key'].split(">")
                            
                            #worksheet.write(1+row, 0, emotions_date, self.default)
                            emotions_date = bucket0['key_as_string']
                            worksheet.write(1+row, 0, re.sub("-", "", emotions_date[:emotions_date.find("T")]), self.default)
                            worksheet.write(1+row, 1, re.sub("[\[\]]", "", depth_level[0]) if len(bucket1['key'].split(">"))>=0 else '', self.default)
                            worksheet.write(1+row, 2, re.sub("[\[\]]", "", depth_level[1]) if len(bucket1['key'].split(">"))>=1 else '', self.default)
                            worksheet.write(1+row, 3, re.sub("[\[\]]", "", depth_level[2]) if len(bucket1['key'].split(">"))>=2 else '', self.default)
                            worksheet.write(1+row, 4, bucket2['key'], self.default)
                            worksheet.write(1+row, 5, '', self.default)
                            worksheet.write(1+row, 6, '', self.default)
                            worksheet.write(1+row, 7, bucket5['key'], self.default)
                            worksheet.write(1+row, 8, bucket5['doc_count'], self.default)
                            
                            total += int(bucket5['doc_count']) 
                            row += 1

        # 합꼐
        if len(params['datasets'].split("^"))==1:
            worksheet.write(row+1, 0, '합계', self.header)
            worksheet.write(row+1, 1, '', self.header)
            worksheet.write(row+1, 2, '', self.header)
            worksheet.write(row+1, 3, '', self.header)
            worksheet.write(row+1, 4, '', self.header)
            worksheet.write(row+1, 5, '', self.header)
            worksheet.write(row+1, 6, '', self.header)
            worksheet.write(row+1, 7, '', self.header)
            worksheet.write(row+1, 8, total, self.header)

        logger.info("<%s> Total Emotions : %d" % (self.dataset_names, row) )
        
        
        

    # 원문
    def create_documents_list(self, params, index):
        # title, content에 포함되어 있을 시 제외시킬 패턴 가져오기
        project_filter_keywords = db.get_project_filter_keywords(params['project_seq'])
        
        EXCLUDE_PATTERNS = None
        if project_filter_keywords and 'regex_filter_keywords' in project_filter_keywords:
            EXCLUDE_PATTERNS = re.compile("(?i)("+re.sub(",", "|", project_filter_keywords['regex_filter_keywords'].strip())+")")
        
        size = 10000 # 페이징 사이즈

        # 검색 시작
        result = es.get_documents(params, size, index, "")

        # 시트 생성
        worksheet = self.workbook.add_worksheet("원문(%s)(0)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]]))

        # 엑셀 헤더
        worksheet.write(0, 0, 'ID', self.header)
        worksheet.write(0, 1, '게시일', self.header)
        worksheet.write(0, 2, '작성자', self.header)
        worksheet.write(0, 3, 'URL', self.header)
        worksheet.write(0, 4, '제목', self.header)
        worksheet.write(0, 5, '내용', self.header)
        worksheet.write(0, 6, '채널1', self.header)
        worksheet.write(0, 7, '채널2', self.header)
        worksheet.write(0, 8, '채널3', self.header)
        worksheet.write(0, 9, '정확도', self.header) # 정확도(Score) 추가    
        
        logger.info("<%s> Total Documents : %d" % (self.dataset_names, result["hits"]["total"]))
        
        # 엑셀 본문
        if "hits" in result and result["hits"]["total"] > 0:
            row = 0
            for this_result in result["hits"]["hits"]:
                doc_id       = this_result["_id"]
                doc_datetime = this_result["_source"]["doc_datetime"]
                doc_writer   = re.sub("[\+=\-/]", "", str(this_result["_source"]["doc_writer"]))
                doc_url      = this_result["_source"]["doc_url"]
                doc_title    = re.sub("[\+=\-/]", "", str(this_result["_source"]["doc_title"]))
                doc_content  = re.sub("[\+=\-/]", "", str(this_result["_source"]["doc_content"]))
                depth1_nm    = this_result["_source"]["depth1_nm"]
                depth2_nm    = this_result["_source"]["depth2_nm"]
                depth3_nm    = this_result["_source"]["depth3_nm"]
                score        = this_result["_score"]
                
                # 2018.04.05 특정 패턴이 등장하는 title, content가 포함되어 있을 경우 row에서 제외.
                if EXCLUDE_PATTERNS is not None and (EXCLUDE_PATTERNS.search(doc_title) is not None or EXCLUDE_PATTERNS.search(doc_content) is not None):
                    continue
                
                row += 1
                worksheet.write(row, 0, doc_id, self.default)
                worksheet.write(row, 1, doc_datetime, self.default)
                worksheet.write(row, 2, doc_writer, self.default)
                worksheet.write(row, 3, doc_url, self.default)
                worksheet.write(row, 4, doc_title, self.default)
                worksheet.write(row, 5, doc_content, self.default)
                worksheet.write(row, 6, depth1_nm, self.default)
                worksheet.write(row, 7, depth2_nm, self.default)
                worksheet.write(row, 8, depth3_nm, self.default)
                worksheet.write(row, 9, score, self.default)

            # 결과건수가 한 페이지 사이즈보다 큰 경우, scroll을 이용해서 paging하며 결과를 가져옴.
            # 용량이 클 것으로 예상하여 엑셀 파일도 새로 생성.
            if "hits" in result and result["hits"]["total"] > size:
                row = 0
                for page in range(1, math.ceil(result["hits"]["total"]/size)): # 0, 1, 2, ....
                    worksheet = self.workbook.add_worksheet("원문(%s)(%d)"%("~".join([params['start_date'][0:10],params['end_date'][0:10]]),page))
                    # 엑셀 헤더
                    worksheet.write(0, 0, 'ID', self.header)
                    worksheet.write(0, 1, '게시일', self.header)
                    worksheet.write(0, 2, '작성자', self.header)
                    worksheet.write(0, 3, 'URL', self.header)
                    worksheet.write(0, 4, '제목', self.header)
                    worksheet.write(0, 5, '내용', self.header)
                    worksheet.write(0, 6, '채널1', self.header)
                    worksheet.write(0, 7, '채널2', self.header)
                    worksheet.write(0, 8, '채널3', self.header)
                    worksheet.write(0, 9, '정확도', self.header) # 정확도(Score) 추가    
                    
                    scrolled_result = es.get_documents(params, size, index, scroll_id=result["_scroll_id"])
                    for this_result in scrolled_result["hits"]["hits"]:
                        doc_id       = this_result["_id"]
                        doc_datetime = this_result["_source"]["doc_datetime"]
                        doc_writer   = re.sub("[\+=\-/]", "", str(this_result["_source"]["doc_writer"]))
                        doc_url      = this_result["_source"]["doc_url"]
                        doc_title    = re.sub("[\+=\-/]", "", str(this_result["_source"]["doc_title"]))
                        doc_content  = re.sub("[\+=\-/]", "", str(this_result["_source"]["doc_content"]))
                        depth1_nm    = this_result["_source"]["depth1_nm"]
                        depth2_nm    = this_result["_source"]["depth2_nm"]
                        depth3_nm    = this_result["_source"]["depth3_nm"]
                        score        = this_result["_score"]
                        
                        # 2018.04.05 특정 패턴이 등장하는 title, content가 포함되어 있을 경우 row에서 제외.
                        if EXCLUDE_PATTERNS is not None and (EXCLUDE_PATTERNS.search(doc_title) is not None or EXCLUDE_PATTERNS.search(doc_content) is not None):
                            continue
                        
                        row += 1
                        worksheet.write(row, 0, doc_id, self.default)
                        worksheet.write(row, 1, doc_datetime, self.default)
                        worksheet.write(row, 2, doc_writer, self.default)
                        worksheet.write(row, 3, doc_url, self.default)
                        worksheet.write(row, 4, doc_title, self.default)
                        worksheet.write(row, 5, doc_content, self.default)
                        worksheet.write(row, 6, depth1_nm, self.default)
                        worksheet.write(row, 7, depth2_nm, self.default)
                        worksheet.write(row, 8, depth3_nm, self.default)
                        worksheet.write(row, 9, score, self.default)


                    # 마지막 페이지를 처리하고 나면 scroll을 clear
                    if page == math.ceil(result["hits"]["total"]/size)-1: 
                        if result["_scroll_id"]:
                            es.clear_scroll(result["_scroll_id"])



    def make_trend_report(self, params):
        logger.info("============================= \"make_trend_report\" starts.")

        today = re.sub("[-]", "", params['start_date'][0:10])

        worksheet = self.workbook.add_worksheet("연관어(%s)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]]))

        # 헤더
        # 날짜 형식은 YYYYMMDD 이어야 함
        worksheet.write(0, 0, '날짜', self.header)
        worksheet.write(0, 1, '시간', self.header)
        worksheet.write(0, 2, '검색그룹', self.header)
        worksheet.write(0, 3, '검색아이템', self.header)
        worksheet.write(0, 4, '검색키워드', self.header)
        worksheet.write(0, 5, '키워드', self.header)

        # 데이터
        result = db.get_data_for_report_trend(params['project_seq'], today)
        for idx, row in enumerate(result, 1):
            worksheet.write(idx, 0, row[0], self.default)
            worksheet.write(idx, 1, row[1], self.default)
            worksheet.write(idx, 2, row[2], self.default)
            worksheet.write(idx, 3, row[3], self.default)
            worksheet.write(idx, 4, row[4], self.default)
            worksheet.write(idx, 5, row[5], self.default)




    def create_report(self, params):
        self.workbook = xlsxwriter.Workbook(os.path.join(self.file_path.replace("/", os.path.sep), self.file_name), options={'strings_to_urls': False, 'strings_to_numbers': True} )
        self.header = self.workbook.add_format(self.HEADER_FORMAT)
        self.default = self.workbook.add_format(self.DEFAULT_FORMAT)

        if self.mode == MODE_TOPICS:
            self.topics_list(params)
        elif self.mode == MODE_DOCUMENTS:
            self.create_documents_list(params, INDEX_DOCUMENTS)
        elif self.mode == MODE_EMOTIONS:
            self.emotions_per_causes(params)
        elif self.mode == MODE_TREND:
            self.make_trend_report(params)


        self.close_workbook()

    
    
    
    def close_workbook(self):
        self.workbook.close()