Beispiel #1
0
def get_request_query(params, scroll_id=None):
    queryObj = Query(params)

    if not scroll_id:
        request = {"query": {"bool": {}}}
    else:
        request = {"scroll": "1d", "scroll_id": scroll_id}

    if "query" in request:
        filter = []
        # 프로젝트 시퀀스 포함
        filter.append(queryObj.get_project_seq_query())
        # 여러 프로젝트 seq 가 들어오더라도 모두 filter keyword가 동일하므로 첫번째 project_seq만 사용.
        filter.append(
            queryObj.get_project_filter_query(
                params['project_seqs'].split(",")[0]))

        # 대상 채널
        if "channels" in params and params[
                "channels"] and params["channels"] != 'all':
            filter.append(queryObj.get_channel_query())

        # 대상 기간
        if "start_date" in params and "end_date" in params:
            filter.append(queryObj.get_period_query(params['mode']))

        request["query"]["bool"]["filter"] = filter
        request["query"]["bool"]["must"] = queryObj.get_total_dataset_query(
            params['project_seqs'])

    logger.debug("[get_request_query] Query >>> %s " % json.dumps(request))

    return request
Beispiel #2
0
def get_documents(params, size, index, scroll_id=None):
    request = None
    queryObj = Query(params)

    if not scroll_id:
        es_uri = "/" + index + "/doc/_search?scroll=1d"
        request = queryObj.get_documents_query()
        request['size'] = size
    else:
        es_uri = "/_search/scroll"
        request = {"scroll": "1d", "scroll_id": scroll_id}

    logger.debug("get_documents() ==> request : ")
    for k, v in request.items():
        logger.debug("\t{} : {}".format(k, v))

    es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60)
    es_conn.request("POST", es_uri, json.dumps(request),
                    {"Content-type": "application/json"})
    result = es_conn.getresponse().read()

    if 'hits' in json.loads(result):
        logger.debug("[get_documents] result['hits']['total'] >>> %d" %
                     int(json.loads(result)['hits']['total']))
    else:
        logger.debug("[get_documents] result ::: " + str(result))

    return json.loads(result)
Beispiel #3
0
 def __init__(self, params):
     self.compare = True if params['compare_yn']=='Y' else False
     self.start_date = re.sub("[-:\s]", "", params['start_date'])[:8]
     self.end_date = re.sub("[-:\s]", "", params['end_date'])[:8]
     self.seq = params['seq']
     self.reg_dt = re.sub("[-:\s]", "", params['reg_dt'])
     self.report_type = db.get_exceltype_name(params['type_cd']) # RSP -> 리포트_소셜모니터링_추이분석
     self.project_name = db.get_project_name(params['project_seq'])
     self.channel = '전체' if not params['channels'] or params['channels']=='all' else "채널일부"
     
     self.dataset_names = ",".join([db.get_dataset_name(x) if db.get_dataset_name(x)!=None else 'unknown' for x in params['datasets'].split("^")]) if params['datasets'] else '' # 6^7^15 -> 신라면,안성탕면,짜파게티
     if os.name == 'nt' and bool(re.match("[\/\\\"*?<>\|]", self.dataset_names)):
         self.dataset_names = re.sub("[\/\\\"*?<>\|]", "_", self.dataset_names)
         
     self.queryObj = Query()
     
     compare_yn = "동일기간비교" if params['compare_yn']=='Y' else "해당기간"
     
     if not params['datasets']: # 검색트렌드
         self.file_name = "_".join([str(self.seq), self.report_type, self.start_date, self.end_date, compare_yn]) + ".xlsx"
     else: # 소셜모니터링
         if len(params['datasets'].split("^"))>1:
             self.file_name = "_".join([str(self.seq), self.report_type, self.channel, self.start_date, self.end_date, compare_yn]) + ".xlsx"
         else:
             self.file_name = "_".join([str(self.seq), self.report_type+"("+self.dataset_names+")", self.channel, self.start_date, self.end_date, compare_yn]) + ".xlsx"
             
     self.logger.info("=======================================================================================")
     for k, v in params.items():
         self.logger.info(k + " :\t\t" + str(v))
     self.logger.info("=======================================================================================")    
Beispiel #4
0
    def __init__(self, params):
        self.seq = params['seq']
        self.compare = True if params['compare_yn']=='Y' else False

        self.start_date = re.sub("[-:T\s]", "", params['start_date'])[:12]
        self.end_date = re.sub("[-:T\s]", "", params['end_date'])[:12]
        self.reg_dt = re.sub("[-:T\s]", "", params['reg_dt'])

        self.dataset_names = ",".join([db.get_dataset_name(x) if db.get_dataset_name(x)!=None else 'unknown' for x in str(params['datasets']).split("^")]) if params['datasets'] else '' # 6^7^15 -> 신라면,안성탕면,짜파게티
        self.query = Query(params)

        self.file_name = "B-%d-%s-I-C.SCD" % (self.seq, get_current_datetime())
Beispiel #5
0
def get_request_query(params, scroll_id=None):
    queryObj = Query(params)

    if not scroll_id:
        request = {"query": {"bool": {}}}
    else:
        request = {"scroll": "1d", "scroll_id": scroll_id}

    if "query" in request:
        filter = []
        # 프로젝트 시퀀스 포함
        filter.append(queryObj.get_project_seq_query())
        # 여러 프로젝트 seq 가 들어오더라도 모두 filter keyword가 동일하므로 첫번째 project_seq만 사용.
        filter.append(
            queryObj.get_project_filter_query(
                params['project_seqs'].split(",")[0]))

        # 대상 채널
        if "channels" in params and params[
                "channels"] and params["channels"] != 'all':
            filter.append(queryObj.get_channel_query())

        # 대상 기간
        if "start_date" in params and "end_date" in params:
            filter.append(queryObj.get_period_query(params['mode']))

        request["query"]["bool"]["filter"] = filter
        request["query"]["bool"]["must"] = queryObj.get_total_dataset_query(
            params['project_seqs'])
        '''
        request["query"]["bool"]["must"] = {
            "bool" : {
                "should" : [
                    {
                        "query_string": {
                            "fields": ["doc_title^100", "doc_content"],
                            "query" : "신한은행",
                            "default_operator" : "AND",
                            "tie_breaker" : 0.0
                        }
                    }
                ]
            }
        }
        '''

    logger.debug("[get_request_query] Query >>> %s " % json.dumps(request))

    return request
Beispiel #6
0
def get_documents_count(params, index):
    queryObj = Query(params)

    es_uri = "/" + index + "/doc/_count"
    request = queryObj.get_documents_query()

    es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60)
    es_conn.request("POST", es_uri, json.dumps(request),
                    {"Content-type": "application/json"})
    result = es_conn.getresponse().read()

    if 'count' in json.loads(result):
        return json.loads(result)['count']
    else:
        logger.error("[get_documents_count] %s" % str(result))
        return -1
Beispiel #7
0
def get_documents(params, size, index, scroll_id=None):
    queryObj = Query(params)

    if not scroll_id:
        es_uri = "/" + index + "/doc/_search?scroll=1d"
        request = {"size": size, "query": {"bool": {}}}
    else:
        es_uri = "/_search/scroll"
        request = {"scroll": "1d", "scroll_id": scroll_id}

    if "query" in request:
        filter = []
        # 프로젝트 시퀀스 포함
        filter.append(queryObj.get_project_seq_query())
        filter.append(queryObj.get_project_filter_query(params['project_seq']))

        # 대상 채널
        if "channels" in params and params[
                "channels"] and params["channels"] != 'all':
            filter.append(queryObj.get_channel_query())

        # 대상 기간
        if "start_date" in params and "end_date" in params:
            filter.append(queryObj.get_period_query())

        request["query"]["bool"]["filter"] = filter

        # 데이터셋의 포함 키워드
        if "datasets" in params and params["datasets"]:
            request["query"]["bool"]["must"] = queryObj.get_dataset_query(
                params['project_seq'], params["datasets"])

    logger.debug("[get_documents] Query >>> %s " % json.dumps(request))

    es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60)
    es_conn.request("POST", es_uri, json.dumps(request),
                    {"Content-type": "application/json"})
    result = es_conn.getresponse().read()

    if 'hits' in json.loads(result):
        logger.debug("[get_documents] result['hits']['total'] >>> %d" %
                     int(json.loads(result)['hits']['total']))
    else:
        logger.debug("[get_documents] result ::: " + str(result))

    return json.loads(result)
Beispiel #8
0
    def __init__(self, params):
        self.mode = params['mode']
        self.compare = True if params['compare_yn']=='Y' else False

        self.start_date = re.sub("[-:T\s]", "", params['start_date'])[:12]
        self.end_date = re.sub("[-:T\s]", "", params['end_date'])[:12]
        self.reg_dt = re.sub("[-:T\s]", "", params['reg_dt'])

        self.dataset_names = ",".join([db.get_dataset_name(x) if db.get_dataset_name(x)!=None else 'unknown' for x in str(params['datasets']).split("^")]) if params['datasets'] else '' # 6^7^15 -> 신라면,안성탕면,짜파게티
        self.query = Query(params)

        if mode == MODE_DOCUMENTS:
            self.file_name = "_".join(["SNS", self.dataset_names, self.start_date, self.end_date]) + ".xlsx"
        elif mode == MODE_TOPICS:
            self.file_name = "_".join(["화제어", self.dataset_names, self.start_date, self.end_date]) + ".xlsx"
        elif mode == MODE_EMOTIONS:
            self.file_name = "_".join(["감성분석", self.dataset_names, self.start_date, self.end_date]) + ".xlsx"
        elif mode == MODE_TREND:
            self.file_name = "_".join(["연관검색어", str(params['project_seq']), self.start_date, self.end_date]) + ".xlsx"
Beispiel #9
0
                params['project_seq'], params["datasets"])

    logger.debug("[get_documents] Query >>> %s " % json.dumps(request))

    es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60)
    es_conn.request("POST", es_uri, json.dumps(request),
                    {"Content-type": "application/json"})
    result = es_conn.getresponse().read()

    if 'hits' in json.loads(result):
        logger.debug("[get_documents] result['hits']['total'] >>> %d" %
                     int(json.loads(result)['hits']['total']))
    else:
        logger.debug("[get_documents] result ::: " + str(result))

    return json.loads(result)


if __name__ == '__main__':
    params = {
        "start_date": "2018-01-01T00:00:00",
        "end_date": "2018-12-31T23:59:59",
        "project_seq": 176,
        "compare_yn": "N",
        "channels": "all",
        "datasets": "2852"
    }
    queryObj = Query(params)

    #print(queryObj.ALL_TOPICS_LIST("신한금융지주"))
    print(get_documents(params, 10, "documents"))
Beispiel #10
0
def get_documents(params, size, index, scroll_id=None):
    queryObj = Query(params)

    if not scroll_id:
        es_uri = "/" + index + "/doc/_search?scroll=1d"
        request = {"size": size, "query": {"bool": {"must": []}}}
    else:
        es_uri = "/_search/scroll"
        request = {"scroll": "1d", "scroll_id": scroll_id}

    must = []
    # 프로젝트 시퀀스 포함
    must.append(get_project_seq_query(params))

    # 대상 채널
    if "channels" in params and params[
            "channels"] and params["channels"] != 'all':
        must.append(get_channel_query(params))

    # 대상 기간
    if "start_date" in params and "end_date" in params:
        must.append(get_period_query(params))

    # 데이터셋의 포함 키워드
    if "datasets" in params and params["datasets"]:  # 신라면,삼양라면,안성탕면
        if len(params["datasets"].split("^")) > 1:
            should = []
            for dataset in params["datasets"].split("^"):
                should.append(
                    queryObj.get_dataset_query(params['project_seq'], dataset))

            must.append({"bool": {"should": should}})
        else:
            must.append(
                queryObj.get_dataset_query(params['project_seq'],
                                           params["datasets"]))

    # elif params["type_cd"] == "CCT002": # 소셜모니터링-문서통계
    # elif params["type_cd"] == "CCT003": # 소셜모니터링-감성분석
    # .....
    # 코드별로 request 필요한 형태로 변경해서 추가

    if "query" in request:
        request["query"]["bool"]["must"] = must

    logger.debug("get_documents() ==> request : ")
    for k, v in request.items():
        logger.debug("\t{} : {}".format(k, v))

    es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60)
    es_conn.request("POST", es_uri, json.dumps(request),
                    {"Content-type": "application/json"})
    result = es_conn.getresponse().read()

    if 'hits' in json.loads(result):
        logger.debug("[get_documents] result['hits']['total'] >>> %d" %
                     int(json.loads(result)['hits']['total']))
    else:
        logger.debug("[get_documents] result ::: " + str(result))

    return json.loads(result)