def occupation_per_emotions(self, params): worksheet = self.workbook.add_worksheet('감성분석 점유율') #arr_dataset_names = self.dataset_names.split(",") if not self.compare: # 헤더 worksheet.write(0, 0, '긍부정', self.header) worksheet.write(0, 1, '분석량', self.header) worksheet.write(0, 2, '점유율(%)', self.header) # 데이터 qdsl = self.queryObj.EMOTIONS_OCCUPATIONS(params, self.compare) self.logger.debug("[ReportEmotions][occupation_per_emotions] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['aggregations']['my_aggs1']['doc_count'] total_percentage = 0.0 row = 0 for bucket in result['aggregations']['my_aggs1']['my_aggs2']['buckets']: worksheet.write(1+row, 0, bucket['key'], self.default) # 데이터셋 이름 worksheet.write(1+row, 1, bucket['doc_count'], self.default) # 데이터셋 이름 total_percentage += bucket['doc_count']/total*100 worksheet.write(1+row, 2, bucket['doc_count']/total*100, self.default) # 데이터셋 이름 row += 1 worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, total, self.header) worksheet.write(row+1, 2, total_percentage, self.header) else: # 헤더 worksheet.write(0, 0, '날짜', self.header) worksheet.write(0, 1, '긍부정', self.header) worksheet.write(0, 2, '분석량', self.header) # 데이터 qdsl = self.queryObj.EMOTIONS_OCCUPATIONS(params, self.compare) self.logger.debug("[ReportEmotions][occupation_per_emotions] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = 0 total_percentage = 0.0 row = 0 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket3 in bucket1['my_aggs2']['my_aggs3']['buckets']: worksheet.write(1+row, 0, bucket1['key'], self.default) # 데이터셋 이름 worksheet.write(1+row, 1, bucket3['key'], self.default) # 데이터셋 이름 worksheet.write(1+row, 2, bucket3['doc_count'], self.default) # 데이터셋 이름 total += bucket3['doc_count'] row += 1 # 합계 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, total, self.header)
def occupation_per_emotions(self, params): worksheet = self.workbook.add_worksheet('감성분석 점유율') #arr_dataset_names = self.dataset_names.split(",") if not self.compare: # 헤더 worksheet.write(0, 0, '긍부정', self.header) worksheet.write(0, 1, '문서수', self.header) worksheet.write(0, 2, '점유율(%)', self.header) # 데이터 qdsl = self.query.EMOTIONS_OCCUPATIONS(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] total_percentage = 0.0 row = 0 for bucket in result['aggregations']['my_aggs1']['buckets']: worksheet.write(1 + row, 0, bucket['key'], self.default) # 데이터셋 이름 worksheet.write(1 + row, 1, bucket['doc_count'], self.default) # 데이터셋 이름 total_percentage += bucket['doc_count'] / total * 100 worksheet.write(1 + row, 2, bucket['doc_count'] / total * 100, self.default) # 데이터셋 이름 row += 1 worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, total, self.header) worksheet.write(row + 1, 2, total_percentage, self.header) else: # 헤더 worksheet.write(0, 0, '날짜', self.header) worksheet.write(0, 1, '긍부정', self.header) worksheet.write(0, 2, '문서수', self.header) # 데이터 qdsl = self.query.EMOTIONS_OCCUPATIONS(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] total_percentage = 0.0 row = 0 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write(1 + row, 0, bucket1['key'], self.default) # 데이터셋 이름 worksheet.write(1 + row, 1, bucket2['key'], self.default) # 데이터셋 이름 worksheet.write(1 + row, 2, bucket2['doc_count'], self.default) # 데이터셋 이름 row += 1 # 합계 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, total, self.header)
def emotions_per_day(self, params): worksheet = self.workbook.add_worksheet('감성분석 추이') #arr_dataset_names = self.dataset_names.split(",") # 헤더 worksheet.write(0, 0, '긍부정', self.header) worksheet.write(0, 1, '일자', self.header) worksheet.write(0, 2, '문서수', self.header) # 데이터 qdsl = self.query.EMOTIONS_PROGRESS(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write(1 + row, 0, bucket1['key'], self.default) worksheet.write(1 + row, 1, bucket2['key_as_string'], self.default) worksheet.write(1 + row, 2, bucket2['doc_count'], self.default) row += 1 # 합꼐 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, total, self.header)
def emotions_per_day(self, params): worksheet = self.workbook.add_worksheet('감성분석 추이') #arr_dataset_names = self.dataset_names.split(",") # 헤더 worksheet.write(0, 0, '일자', self.header) worksheet.write(0, 1, '긍부정', self.header) worksheet.write(0, 2, '분석량', self.header) # 데이터 qdsl = self.queryObj.EMOTIONS_PROGRESS(params) self.logger.debug("[ReportEmotions][emotions_per_day] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) #total = result['hits']['total'] total = 0 row = 0 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket3 in bucket1['my_aggs2']['my_aggs3']['buckets']: worksheet.write(1+row, 0, bucket1['key_as_string'], self.default) worksheet.write(1+row, 1, bucket3['key'], self.default) worksheet.write(1+row, 2, bucket3['doc_count'], self.default) total += bucket3['doc_count'] row += 1 # 합꼐 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, total, self.header)
def depth2_channel_occupations_in_documents(self, params, depth1_seq): sheet_name = '' if depth1_seq is Channel.COMMUNITY: sheet_name = '커뮤니티 문서량' elif depth1_seq is Channel.MEDIA: sheet_name = '미디어 문서량' elif depth1_seq is Channel.SNS: sheet_name = 'SNS 문서량' elif depth1_seq is Channel.PORTAL: sheet_name = '포털 문서량' worksheet = self.workbook.add_worksheet(sheet_name) # 데이터 qdsl = self.queryObj.DEPTH2_CHANNEL_OCCUPATIONS(depth1_seq.value, params, self.compare) self.logger.debug("[ReportStatistics][depth2_channel_occupations_in_documents] %s" % qdsl) result = es.get_aggregations(qdsl, params, self.INDEX_NAME) total = result['hits']['total'] if not self.compare: # 헤더 worksheet.write(0, 0, '채널별', self.header) worksheet.write(0, 1, '문서수', self.header) worksheet.write(0, 2, '비율(%)', self.header) # 데이터 total_percentage = 0.0 row=1 for bucket in result['aggregations']['my_aggs1']['buckets']: worksheet.write(row, 0, bucket['key'], self.default) # Depth1 worksheet.write(row, 1, bucket['doc_count'], self.default) worksheet.write(row, 2, bucket['doc_count']/total*100, self.default) total_percentage += bucket['doc_count']/total*100 row+=1 # 합계 if len(params['datasets'].split("^"))==1: worksheet.write(row, 0, "합계", self.header) worksheet.write(row, 1, total, self.header) worksheet.write(row, 2, total_percentage, self.header) else: worksheet.write(0, 0, '채널별', self.header) worksheet.write(0, 1, '날짜', self.header) worksheet.write(0, 2, '문서수', self.header) # 데이터 row=1 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write(row, 0, bucket1['key'], self.default) # Depth1 worksheet.write(row, 1, bucket2['key'], self.default) worksheet.write(row, 2, bucket2['doc_count'], self.default) row+=1 # 합계 if len(params['datasets'].split("^"))==1: worksheet.write(row, 0, "합계", self.header) worksheet.write(row, 1, '', self.header) worksheet.write(row, 2, total, self.header)
def emotions_per_causes(self, params): worksheet = self.workbook.add_worksheet("강성분석(%s)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]])) # 헤더 # 날짜 형식은 YYYYMMDD 이어야 함 worksheet.write(0, 0, '날짜', self.header) worksheet.write(0, 1, '채널1', self.header) worksheet.write(0, 2, '채널2', self.header) worksheet.write(0, 3, '채널3', self.header) worksheet.write(0, 4, '대분류', self.header) worksheet.write(0, 5, '중분류', self.header) worksheet.write(0, 6, '소분류', self.header) worksheet.write(0, 7, '긍부정', self.header) worksheet.write(0, 8, '문서수', self.header) # 데이터 qdsl = self.query.EMOTIONS_PER_CAUSES() result = es.get_aggregations(copy.copy(qdsl), params, INDEX_EMOTIONS) #total = result['hits']['total'] total = 0 row = 0 #emotions_date = params['start_date'][0:10].replace('-','') for bucket0 in result['aggregations']['my_aggs0']['buckets']: for bucket1 in bucket0['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: for bucket5 in bucket2['my_aggs3']['my_aggs4']['my_aggs5']['buckets']: # 2018.01.11 "(주)"가 포함된 경우에는 (주)를 뺀 나머지 이름이 포함됐는지 확인해야 하므로 변경. if params['dataset_name'].find(bucket2['key']) >= 0 : depth_level = bucket1['key'].split(">") #worksheet.write(1+row, 0, emotions_date, self.default) emotions_date = bucket0['key_as_string'] worksheet.write(1+row, 0, re.sub("-", "", emotions_date[:emotions_date.find("T")]), self.default) worksheet.write(1+row, 1, re.sub("[\[\]]", "", depth_level[0]) if len(bucket1['key'].split(">"))>=0 else '', self.default) worksheet.write(1+row, 2, re.sub("[\[\]]", "", depth_level[1]) if len(bucket1['key'].split(">"))>=1 else '', self.default) worksheet.write(1+row, 3, re.sub("[\[\]]", "", depth_level[2]) if len(bucket1['key'].split(">"))>=2 else '', self.default) worksheet.write(1+row, 4, bucket2['key'], self.default) worksheet.write(1+row, 5, '', self.default) worksheet.write(1+row, 6, '', self.default) worksheet.write(1+row, 7, bucket5['key'], self.default) worksheet.write(1+row, 8, bucket5['doc_count'], self.default) total += int(bucket5['doc_count']) row += 1 # 합꼐 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, '', self.header) worksheet.write(row+1, 3, '', self.header) worksheet.write(row+1, 4, '', self.header) worksheet.write(row+1, 5, '', self.header) worksheet.write(row+1, 6, '', self.header) worksheet.write(row+1, 7, '', self.header) worksheet.write(row+1, 8, total, self.header) logger.info("<%s> Total Emotions : %d" % (self.dataset_names, row) )
def dataset_occupations_per_depth1_in_documents(self, params): worksheet = self.workbook.add_worksheet('채널별 문서점유율') # 헤더 worksheet.write(0, 0, '데이터셋', self.header) worksheet.write(0, 1, '채널', self.header) if not self.compare: worksheet.write(0, 2, '문서수', self.header) else: worksheet.write(0, 2, '날짜범위', self.header) worksheet.write(0, 3, '문서수', self.header) # 데이터 qdsl = self.query.DATASET_OCCUPATIONS_PER_DEPTH1(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 if total > 0: if not self.compare: for dataset_seq in params['datasets'].split("^"): for d1 in result['aggregations']['my_aggs1']['buckets'][ dataset_seq]['my_aggs2']['buckets']: dataset_name = mariadb.get_dataset_name( dataset_seq) if mariadb.get_dataset_name( dataset_seq) != None else 'unknown' worksheet.write(1 + row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1 + row, 1, d1['key'], self.default) # 데이터셋 이름 worksheet.write(1 + row, 2, d1['doc_count'], self.default) # 데이터셋 이름 row += 1 else: for dataset_seq in params['datasets'].split("^"): for bucket1 in result['aggregations']['my_aggs1'][ 'buckets'][dataset_seq]['my_aggs2']['buckets']: for bucket2 in bucket1['my_aggs3']['buckets']: dataset_name = mariadb.get_dataset_name( dataset_seq) if mariadb.get_dataset_name( dataset_seq) != None else 'unknown' worksheet.write(1 + row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1 + row, 1, bucket1['key'], self.default) # 데이터셋 이름 worksheet.write(1 + row, 2, bucket2['key'], self.default) # 데이터셋 이름 worksheet.write(1 + row, 3, bucket2['doc_count'], self.default) # 데이터셋 이름 row += 1 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) if not self.compare: worksheet.write(row + 1, 2, total, self.header) else: worksheet.write(row + 1, 2, '', self.header) worksheet.write(row + 1, 3, total, self.header)
def emotions_per_channel(self, params): worksheet = self.workbook.add_worksheet('채널별 감성분석') #arr_dataset_names = self.dataset_names.split(",") # 데이터 qdsl = self.query.EMOTIONS_PER_DEPTH1(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 # 헤더 if not self.compare: worksheet.write(0, 0, '채널', self.header) worksheet.write(0, 1, '긍부정', self.header) worksheet.write(0, 2, '문서수', self.header) for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write( 1 + row, 0, mariadb.get_channel_name(1, bucket1['key'])[0], self.default) worksheet.write(1 + row, 1, bucket2['key'], self.default) worksheet.write(1 + row, 2, bucket2['doc_count'], self.default) row += 1 # 합꼐 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, total, self.header) else: worksheet.write(0, 0, '일자', self.header) worksheet.write(0, 1, '채널', self.header) worksheet.write(0, 2, '긍부정', self.header) worksheet.write(0, 3, '문서수', self.header) for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: for bucket3 in bucket2['my_aggs3']['buckets']: worksheet.write(1 + row, 0, bucket1['key'], self.default) worksheet.write( 1 + row, 1, mariadb.get_channel_name(1, bucket2['key'])[0], self.default) worksheet.write(1 + row, 2, bucket3['key'], self.default) worksheet.write(1 + row, 3, bucket3['doc_count'], self.default) row += 1 # 합꼐 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, '', self.header) worksheet.write(row + 1, 3, total, self.header)
def topics_list(self, params): worksheet = self.workbook.add_worksheet( "화제어_명사(%s)" % "~".join([params['start_date'][0:10], params['end_date'][0:10]])) # 헤더 worksheet.write(0, 0, '순위', self.header) worksheet.write(0, 1, '화제어', self.header) worksheet.write(0, 2, '문서수', self.header) if not self.compare: worksheet.write(0, 3, '연관어', self.header) worksheet.write(0, 4, '문서수', self.header) #worksheet.write(0, 6, '표현어', self.header) # 데이터 qdsl = self.queryObj.TOPICS_LIST(params) self.logger.debug("[ReportStatistics][topics_list] %s" % qdsl) # 데이터 result_topic = es.get_aggregations(qdsl, params, self.INDEX_NAME) if not self.compare: row = 0 for seq, bucket1 in enumerate( result_topic['aggregations']['my_aggs1']['buckets']): topic = bucket1['key'] if len(bucket1['my_aggs2']['buckets']) > 0: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write(1 + row, 0, 1 + seq, self.default) worksheet.write(1 + row, 1, topic, self.default) worksheet.write(1 + row, 2, bucket1['doc_count'], self.default) worksheet.write(1 + row, 3, bucket2['key'], self.default) worksheet.write(1 + row, 4, bucket2['doc_count'], self.default) row += 1 else: worksheet.write(1 + row, 0, 1 + seq, self.default) worksheet.write(1 + row, 1, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1 + row, 2, bucket1['doc_count'], self.default) worksheet.write(1 + row, 3, '', self.default) worksheet.write(1 + row, 4, '', self.default) row += 1 else: row = 0 for seq, bucket1 in enumerate( result_topic['aggregations']['my_aggs1']['buckets']): topic = bucket1['key'] worksheet.write(1 + row, 0, 1 + seq, self.default) worksheet.write(1 + row, 1, topic, self.default) worksheet.write(1 + row, 2, bucket1['doc_count'], self.default) row += 1
def dataset_count_per_day_in_documents(self, params): worksheet = self.workbook.add_worksheet('일자별 수집량') sum_per_dataset = {} # 헤더 worksheet.write(0, 0, '일자', self.header) col_header = 0 for dataset_name in self.dataset_names.split(","): worksheet.write(0, 1 + col_header, dataset_name, self.header) col_header += 1 worksheet.write(0, 1 + col_header, '합계', self.header) # 데이터 qdsl = self.query.DATASET_COUNT_PER_DAY(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) if 'hits' in result: total = result['hits']['total'] else: total = 0 row = 0 if total > 0: for bucket in result['aggregations']['my_aggs1']['buckets']: if self.compare: worksheet.write(1 + row, 0, bucket['key'], self.header) else: worksheet.write(1 + row, 0, bucket['key_as_string'], self.header) sum_per_day = 0 col_body = 0 for dataset_seq in params['datasets'].split("^"): count_of_this_dataset = bucket['my_aggs2']['buckets'][ dataset_seq]['doc_count'] sum_per_day += count_of_this_dataset sum_per_dataset[ dataset_seq] = count_of_this_dataset if dataset_seq not in sum_per_dataset else sum_per_dataset[ dataset_seq] + count_of_this_dataset worksheet.write(1 + row, 1 + col_body, count_of_this_dataset, self.default) col_body += 1 worksheet.write(1 + row, 1 + col_body, sum_per_day, self.default) row += 1 # 합계 if len(params['datasets'].split("^")) == 1: worksheet.write(1 + row, 0, '합계', self.header) col_footer = 0 for dataset_seq in params['datasets'].split("^"): worksheet.write(1 + row, 1 + col_footer, '', self.header) col_footer += 1 worksheet.write(1 + row, 1 + col_footer, sum_per_dataset[dataset_seq], self.header)
def dataset_occupations_per_depth1_in_emotions(self, params): worksheet = self.workbook.add_worksheet('채널분석량') # 헤더 if not self.compare: worksheet.write(0, 0, '데이터셋', self.header) worksheet.write(0, 1, '채널', self.header) worksheet.write(0, 2, '분석량', self.header) else: worksheet.write(0, 0, '날짜범위', self.header) worksheet.write(0, 1, '데이터셋', self.header) worksheet.write(0, 2, '채널', self.header) worksheet.write(0, 3, '분석량', self.header) # 데이터 qdsl = self.queryObj.DATASET_OCCUPATIONS_PER_DEPTH1_IN_EMOTIONS(params, self.compare) self.logger.debug("[ReportEmotions][dataset_occupations_per_depth1_in_emotions] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) #total = result['hits']['total'] total = 0 row = 0 if 'hits' in result and result['hits']['total']>0: if not self.compare: for dataset_seq in params['datasets'].split("^"): for bucket2 in result['aggregations']['my_aggs1']['buckets'][dataset_seq]['my_aggs2']['buckets']: dataset_name = mariadb.get_dataset_name(dataset_seq) if mariadb.get_dataset_name(dataset_seq)!=None else 'unknown' worksheet.write(1+row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1+row, 1, bucket2['key'], self.default) # 데이터셋 이름 worksheet.write(1+row, 2, bucket2['my_aggs3']['doc_count'], self.default) # 데이터셋 이름 total += bucket2['my_aggs3']['doc_count'] row += 1 else: for bucket1 in result['aggregations']['my_aggs1']['buckets']: for dataset_seq in params['datasets'].split("^"): for bucket3 in bucket1['my_aggs2']['buckets'][dataset_seq]['my_aggs3']['buckets']: dataset_name = mariadb.get_dataset_name(dataset_seq) if mariadb.get_dataset_name(dataset_seq) is not None else 'unknown' worksheet.write(1+row, 0, bucket1['key'], self.default) # 날짜범위 worksheet.write(1+row, 1, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1+row, 2, bucket3['key'], self.default) # 채널명 worksheet.write(1+row, 3, bucket3['my_aggs4']['doc_count'], self.default) # 분석량 total += bucket3['my_aggs4']['doc_count'] row += 1 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) if not self.compare: worksheet.write(row+1, 2, total, self.header) else: worksheet.write(row+1, 2, '', self.header) worksheet.write(row+1, 3, total, self.header)
def emotions_per_channel(self, params): worksheet = self.workbook.add_worksheet('채널별 감성분석') #arr_dataset_names = self.dataset_names.split(",") # 데이터 qdsl = self.queryObj.EMOTIONS_PER_DEPTH1(params, self.compare) self.logger.debug("[ReportEmotions][emotions_per_channel] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) #total = result['hits']['total'] total = 0 row = 0 # 헤더 if not self.compare: worksheet.write(0, 0, '채널', self.header) worksheet.write(0, 1, '긍부정', self.header) worksheet.write(0, 2, '분석량', self.header) for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['my_aggs3']['buckets']: worksheet.write(1+row, 0, mariadb.get_channel_name(1, bucket1['key'])[0], self.default) worksheet.write(1+row, 1, bucket2['key'], self.default) worksheet.write(1+row, 2, bucket2['doc_count'], self.default) total += bucket2['doc_count'] row += 1 # 합꼐 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, total, self.header) else: worksheet.write(0, 0, '일자', self.header) worksheet.write(0, 1, '채널', self.header) worksheet.write(0, 2, '긍부정', self.header) worksheet.write(0, 3, '분석량', self.header) for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: for bucket4 in bucket2['my_aggs3']['my_aggs4']['buckets']: worksheet.write(1+row, 0, bucket1['key'], self.default) worksheet.write(1+row, 1, mariadb.get_channel_name(1, bucket2['key'])[0], self.default) worksheet.write(1+row, 2, bucket4['key'], self.default) worksheet.write(1+row, 3, bucket4['doc_count'], self.default) total += bucket4['doc_count'] row += 1 # 합꼐 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, '', self.header) worksheet.write(row+1, 3, total, self.header)
def depth1_channel_occupations_in_documents(self, params): worksheet = self.workbook.add_worksheet('채널별 문서점유율') # 데이터 qdsl = self.queryObj.DEPTH1_CHANNEL_OCCUPATIONS(params, self.compare) self.logger.debug( "[ReportStatistics][depth1_channel_occupations_in_documents] %s " % qdsl) result = es.get_aggregations(qdsl, params, self.INDEX_NAME) total = result['hits']['total'] if not self.compare: # 헤더 worksheet.write(0, 0, '채널별', self.header) worksheet.write(0, 1, '문서수', self.header) worksheet.write(0, 2, '비율(%)', self.header) # 데이터 total_percentage = 0.0 row = 1 for bucket in result['aggregations']['my_aggs1']['buckets']: worksheet.write(row, 0, bucket['key'], self.default) # Depth1 worksheet.write(row, 1, bucket['doc_count'], self.default) worksheet.write(row, 2, bucket['doc_count'] / total * 100, self.default) total_percentage += bucket['doc_count'] / total * 100 row += 1 # 합계 if len(params['datasets'].split("^")) == 1: worksheet.write(row, 0, "합계", self.header) worksheet.write(row, 1, total, self.header) worksheet.write(row, 2, total_percentage, self.header) else: worksheet.write(0, 0, '채널별', self.header) worksheet.write(0, 1, '날짜', self.header) worksheet.write(0, 2, '문서수', self.header) # 데이터 row = 1 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write(row, 0, bucket1['key'], self.default) # Depth1 worksheet.write(row, 1, bucket2['key'], self.default) worksheet.write(row, 2, bucket2['doc_count'], self.default) row += 1 # 합계 if len(params['datasets'].split("^")) == 1: worksheet.write(row, 0, "합계", self.header) worksheet.write(row, 1, '', self.header) worksheet.write(row, 2, total, self.header)
def dataset_count_per_day_in_documents(self, params): worksheet = self.workbook.add_worksheet('일자별 수집량') sum_per_dataset = {} # 헤더 worksheet.write(0, 0, '일자', self.header) col_header = 0 for dataset_name in self.dataset_names.split(","): worksheet.write(0, 1+col_header, dataset_name, self.header) col_header += 1 worksheet.write(0, 1+col_header, '합계', self.header) # 데이터 qdsl = self.queryObj.DATASET_COUNT_PER_DAY_IN_DOCUMENTS(params, self.compare) self.logger.debug("[ReportStatistics][dataset_count_per_day_in_documents] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) if 'hits' in result: total = result['hits']['total'] else: total = 0 row = 0 if total > 0: for bucket in result['aggregations']['my_aggs1']['buckets']: if self.compare: worksheet.write(1+row, 0, bucket['key'], self.header) else: worksheet.write(1+row, 0, bucket['key_as_string'], self.header) sum_per_day = 0 col_body = 0 for dataset_seq in params['datasets'].split("^"): count_of_this_dataset = bucket['my_aggs2']['buckets'][dataset_seq]['doc_count'] sum_per_day += count_of_this_dataset sum_per_dataset[dataset_seq] = count_of_this_dataset if dataset_seq not in sum_per_dataset else sum_per_dataset[dataset_seq]+count_of_this_dataset worksheet.write(1+row, 1+col_body, count_of_this_dataset, self.default) col_body += 1 worksheet.write(1+row, 1+col_body, sum_per_day, self.default) row += 1 # 합계 if len(params['datasets'].split("^"))==1: worksheet.write(1+row, 0, '합계', self.header) col_footer = 0 for dataset_seq in params['datasets'].split("^"): worksheet.write(1+row, 1+col_footer, '', self.header) col_footer += 1 worksheet.write(1+row, 1+col_footer, sum_per_dataset[dataset_seq], self.header)
def depth1_channel_occupations_in_documents(self, params): worksheet = self.workbook.add_worksheet('채널별 문서점유율') # 데이터 qdsl = self.queryObj.DEPTH1_CHANNEL_OCCUPATIONS(params, self.compare) self.logger.debug("[ReportStatistics][depth1_channel_occupations_in_documents] %s "% qdsl) result = es.get_aggregations(qdsl, params, self.INDEX_NAME) total = result['hits']['total'] if not self.compare: # 헤더 worksheet.write(0, 0, '채널별', self.header) worksheet.write(0, 1, '문서수', self.header) worksheet.write(0, 2, '비율(%)', self.header) # 데이터 total_percentage = 0.0 row=1 for bucket in result['aggregations']['my_aggs1']['buckets']: worksheet.write(row, 0, bucket['key'], self.default) # Depth1 worksheet.write(row, 1, bucket['doc_count'], self.default) worksheet.write(row, 2, bucket['doc_count']/total*100, self.default) total_percentage += bucket['doc_count']/total*100 row+=1 # 합계 if len(params['datasets'].split("^"))==1: worksheet.write(row, 0, "합계", self.header) worksheet.write(row, 1, total, self.header) worksheet.write(row, 2, total_percentage, self.header) else: worksheet.write(0, 0, '채널별', self.header) worksheet.write(0, 1, '날짜', self.header) worksheet.write(0, 2, '문서수', self.header) # 데이터 row=1 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write(row, 0, bucket1['key'], self.default) # Depth1 worksheet.write(row, 1, bucket2['key'], self.default) worksheet.write(row, 2, bucket2['doc_count'], self.default) row+=1 # 합계 if len(params['datasets'].split("^"))==1: worksheet.write(row, 0, "합계", self.header) worksheet.write(row, 1, '', self.header) worksheet.write(row, 2, total, self.header)
def topics_list(self, params): worksheet = self.workbook.add_worksheet("화제어_명사(%s)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]])) # 헤더 worksheet.write(0, 0, '순위', self.header) worksheet.write(0, 1, '화제어', self.header) worksheet.write(0, 2, '문서수', self.header) if not self.compare: worksheet.write(0, 3, '연관어', self.header) worksheet.write(0, 4, '문서수', self.header) #worksheet.write(0, 6, '표현어', self.header) # 데이터 qdsl = self.queryObj.TOPICS_LIST(params) self.logger.debug("[ReportStatistics][topics_list] %s" % qdsl) # 데이터 result_topic = es.get_aggregations(qdsl, params, self.INDEX_NAME) if not self.compare: row=0 for seq, bucket1 in enumerate(result_topic['aggregations']['my_aggs1']['buckets']): topic = bucket1['key'] if len(bucket1['my_aggs2']['buckets'])>0: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write(1+row, 0, 1+seq, self.default) worksheet.write(1+row, 1, topic, self.default) worksheet.write(1+row, 2, bucket1['doc_count'], self.default) worksheet.write(1+row, 3, bucket2['key'], self.default) worksheet.write(1+row, 4, bucket2['doc_count'], self.default) row += 1 else: worksheet.write(1+row, 0, 1+seq, self.default) worksheet.write(1+row, 1, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1+row, 2, bucket1['doc_count'], self.default) worksheet.write(1+row, 3, '', self.default) worksheet.write(1+row, 4, '', self.default) row += 1 else: row=0 for seq, bucket1 in enumerate(result_topic['aggregations']['my_aggs1']['buckets']): topic = bucket1['key'] worksheet.write(1+row, 0, 1+seq, self.default) worksheet.write(1+row, 1, topic, self.default) worksheet.write(1+row, 2, bucket1['doc_count'], self.default) row += 1
def topics_list(self, params): worksheet = self.workbook.add_worksheet("화제어(%s)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]])) # 헤더 # 날짜 형식은 YYYYMMDD 이어야 함 worksheet.write(0, 0, '날짜', self.header) worksheet.write(0, 1, '순위', self.header) worksheet.write(0, 2, '화제어', self.header) worksheet.write(0, 3, '문서수', self.header) worksheet.write(0, 4, '연관어', self.header) worksheet.write(0, 5, '문서수', self.header) # 데이터 result_topic = es.get_aggregations(self.query.ALL_TOPICS_LIST(params['dataset_name']), params, Query.INDEX_TOPICS) row=0 seq=0 # topic의 순위 #topics_date = params['start_date'][0:10].replace('-','') for bucket0 in result_topic['aggregations']['my_aggs0']['buckets']: for bucket1 in bucket0['my_aggs1']['buckets']: topic = re.sub("[\+=\-/]", "", str(bucket1['key'])) seq += 1 topics_date = bucket0['key_as_string'] if len(bucket1['my_aggs2']['buckets'])>0: for bucket2 in bucket1['my_aggs2']['buckets']: str(startdate.strftime('%Y-%m-%dT%H:00:00')) # worksheet.write(1+row, 0, params['start_date'][0:10].replace('-',''), self.default) worksheet.write(1+row, 0, re.sub("-","", topics_date[:topics_date.find("T")]), self.default) worksheet.write(1+row, 1, seq, self.default) worksheet.write(1+row, 2, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1+row, 3, bucket1['doc_count'], self.default) worksheet.write(1+row, 4, bucket2['key'], self.default) worksheet.write(1+row, 5, bucket2['doc_count'], self.default) #worksheet.write(1+row, 6, verb_list, self.default) row += 1 else: worksheet.write(1+row, 0, re.sub("-","", topics_date[:topics_date.find("T")]), self.default) worksheet.write(1+row, 1, seq, self.default) worksheet.write(1+row, 2, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1+row, 3, bucket1['doc_count'], self.default) worksheet.write(1+row, 4, '', self.default) worksheet.write(1+row, 5, '', self.default) #worksheet.write(1+row, 6, '', self.default) row += 1 logger.info("<%s> Total Topics : %d" % (self.dataset_names, row) )
def dataset_occupations_per_depth3_in_emotions(self, params): worksheet = self.workbook.add_worksheet('채널분석량 상세') #arr_dataset_names = self.dataset_names.split(",") # 헤더 worksheet.write(0, 0, '데이터셋', self.header) worksheet.write(0, 1, '1Depth', self.header) worksheet.write(0, 2, '2Depth', self.header) worksheet.write(0, 3, '3Depth', self.header) worksheet.write(0, 4, '문서수', self.header) # 데이터 qdsl = self.query.DATASET_OCCUPATIONS_PER_DEPTH3(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 if 'hits' in result and result['hits']['total'] > 0: for dataset_seq in params['datasets'].split("^"): for d1 in result['aggregations']['my_aggs1']['buckets'][ dataset_seq]['my_aggs2']['buckets']: dataset_name = mariadb.get_dataset_name( dataset_seq) if mariadb.get_dataset_name( dataset_seq) != None else 'unknown' if len(d1['key'].split(">")) > 2: depth1, depth2, depth3 = d1['key'].split(">") else: depth1, depth2 = d1['key'].split(">") depth3 = '' worksheet.write(1 + row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1 + row, 1, re.sub("[\[\]]", "", depth1), self.default) # 데이터셋 이름 worksheet.write(1 + row, 2, re.sub("[\[\]]", "", depth2), self.default) # 데이터셋 이름 worksheet.write(1 + row, 3, re.sub("[\[\]]", "", depth3), self.default) # 데이터셋 이름 worksheet.write(1 + row, 4, d1['doc_count'], self.default) # 데이터셋 이름 row += 1 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, '', self.header) worksheet.write(row + 1, 3, '', self.header) worksheet.write(row + 1, 4, total, self.header)
def dataset_count_per_day_in_emotions(self, params): worksheet = self.workbook.add_worksheet('분석량 추이') sum_per_dataset = {} # 헤더 worksheet.write(0, 0, '일자', self.header) col_header = 0 for dataset_name in self.dataset_names.split(","): worksheet.write(0, 1+col_header, dataset_name, self.header) col_header += 1 worksheet.write(0, 1+col_header, '합계', self.header) # 데이터 qdsl = self.queryObj.DATASET_COUNT_PER_DAY_IN_EMOTIONS(params, self.compare) self.logger.debug("[ReportEmotions][dataset_count_per_day_in_emotions] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) if 'hits' in result and result['hits']['total']>0: row = 0 for bucket in result['aggregations']['my_aggs1']['buckets']: if self.compare: worksheet.write(1+row, 0, bucket['key'], self.header) else: worksheet.write(1+row, 0, bucket['key_as_string'], self.header) sum_per_day = 0 col_body = 0 for dataset_seq in params['datasets'].split("^"): count_of_this_dataset = bucket['my_aggs2']['my_aggs3']['buckets'][dataset_seq]['doc_count'] sum_per_day += count_of_this_dataset sum_per_dataset[dataset_seq] = count_of_this_dataset if dataset_seq not in sum_per_dataset else sum_per_dataset[dataset_seq]+count_of_this_dataset worksheet.write(1+row, 1+col_body, count_of_this_dataset, self.default) col_body += 1 worksheet.write(1+row, 1+col_body, sum_per_day, self.default) row += 1 # 합계 if len(params['datasets'].split("^"))==1: worksheet.write(1+row, 0, '합계', self.header) col_footer = 0 for dataset_seq in params['datasets'].split("^"): worksheet.write(1+row, 1+col_footer, '', self.header) col_footer += 1 worksheet.write(1+row, 1+col_footer, sum_per_dataset[dataset_seq], self.header)
def dataset_occupations_per_depth3_in_emotions(self, params): worksheet = self.workbook.add_worksheet('채널분석량 상세') #arr_dataset_names = self.dataset_names.split(",") # 헤더 worksheet.write(0, 0, '데이터셋', self.header) worksheet.write(0, 1, '1Depth', self.header) worksheet.write(0, 2, '2Depth', self.header) worksheet.write(0, 3, '3Depth', self.header) worksheet.write(0, 4, '분석량', self.header) # 데이터 qdsl = self.queryObj.DATASET_OCCUPATIONS_PER_DEPTH3_IN_EMOTIONS(params, self.compare) self.logger.debug("[ReportEmotions][dataset_occupations_per_depth3_in_emotions] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = 0 row = 0 if 'hits' in result and result['hits']['total']>0: for dataset_seq in params['datasets'].split("^"): for bucket2 in result['aggregations']['my_aggs1']['buckets'][dataset_seq]['my_aggs2']['buckets']: dataset_name = mariadb.get_dataset_name(dataset_seq) if mariadb.get_dataset_name(dataset_seq)!=None else 'unknown' if len(bucket2['key'].split(">"))>2: depth1, depth2, depth3 = bucket2['key'].split(">") else: depth1, depth2 = bucket2['key'].split(">") depth3 = '' worksheet.write(1+row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1+row, 1, re.sub("[\[\]]", "", depth1), self.default) # 데이터셋 이름 worksheet.write(1+row, 2, re.sub("[\[\]]", "", depth2), self.default) # 데이터셋 이름 worksheet.write(1+row, 3, re.sub("[\[\]]", "", depth3), self.default) # 데이터셋 이름 worksheet.write(1+row, 4, bucket2['my_aggs3']['doc_count'], self.default) # 데이터셋 이름 total += bucket2['my_aggs3']['doc_count'] row += 1 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, '', self.header) worksheet.write(row+1, 3, '', self.header) worksheet.write(row+1, 4, total, self.header)
def topics_verb_list(self, params): worksheet = self.workbook.add_worksheet("화제어_동사(%s)"%"~".join([params['start_date'][0:10],params['end_date'][0:10]])) # 헤더 worksheet.write(0, 0, '순위', self.header) worksheet.write(0, 1, '화제어', self.header) worksheet.write(0, 2, '문서수', self.header) # 데이터 qdsl = self.queryObj.TOPICS_VERBS_LIST(params) self.logger.debug("[ReportStatistics][topics_verb_list] %s" % qdsl) result_topic = es.get_aggregations(qdsl, params, self.INDEX_NAME) row=0 for seq, bucket1 in enumerate(result_topic['aggregations']['my_aggs1']['buckets']): topic = bucket1['key'] worksheet.write(1+row, 0, 1+seq, self.default) worksheet.write(1+row, 1, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1+row, 2, bucket1['doc_count'], self.default) row += 1
def topics_verb_list(self, params): worksheet = self.workbook.add_worksheet( "화제어_동사(%s)" % "~".join([params['start_date'][0:10], params['end_date'][0:10]])) # 헤더 worksheet.write(0, 0, '순위', self.header) worksheet.write(0, 1, '화제어', self.header) worksheet.write(0, 2, '문서수', self.header) # 데이터 result_topic = es.get_aggregations(self.query.TOPICS_VERBS_LIST(), params, self.INDEX_TOPICS) row = 0 for seq, bucket1 in enumerate( result_topic['aggregations']['my_aggs1']['buckets']): topic = bucket1['key'] worksheet.write(1 + row, 0, 1 + seq, self.default) worksheet.write(1 + row, 1, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1 + row, 2, bucket1['doc_count'], self.default) row += 1
def dataset_occupations_per_depth3_in_documents(self, params): worksheet = self.workbook.add_worksheet('채널별 수집량') # 데이터 qdsl = self.queryObj.DATASET_OCCUPATIONS_PER_DEPTH3_IN_DOCUMENTS(params, self.compare) self.logger.debug("[ReportStatistics][dataset_occupations_per_depth3_in_documents] %s" % qdsl) if not self.compare: # 헤더 worksheet.write(0, 0, '데이터셋', self.header) worksheet.write(0, 1, '1Depth', self.header) worksheet.write(0, 2, '2Depth', self.header) worksheet.write(0, 3, '3Depth', self.header) worksheet.write(0, 4, '문서수', self.header) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 if total>0: for dataset_seq in params['datasets'].split("^"): for d1 in result['aggregations']['my_aggs1']['buckets'][dataset_seq]['my_aggs2']['buckets']: depth_level = d1['key'].split(">") dataset_name = mariadb.get_dataset_name(dataset_seq) if mariadb.get_dataset_name(dataset_seq)!=None else 'unknown' worksheet.write(1+row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1+row, 1, re.sub("[\[\]]", "", depth_level[0]) if len(depth_level)>=0 else "", self.default) # 데이터셋 이름 worksheet.write(1+row, 2, re.sub("[\[\]]", "", depth_level[1]) if len(depth_level)>=1 else "", self.default) # 데이터셋 이름 worksheet.write(1+row, 3, re.sub("[\[\]]", "", depth_level[2]) if len(depth_level)>=2 else "", self.default) # 데이터셋 이름 worksheet.write(1+row, 4, d1['doc_count'], self.default) # 데이터셋 이름 row += 1 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, '', self.header) worksheet.write(row+1, 3, '', self.header) worksheet.write(row+1, 4, total, self.header) else: # 헤더 worksheet.write(0, 0, '데이터셋', self.header) worksheet.write(0, 1, '1Depth', self.header) worksheet.write(0, 2, '2Depth', self.header) worksheet.write(0, 3, '3Depth', self.header) worksheet.write(0, 4, '날짜', self.header) worksheet.write(0, 5, '문서수', self.header) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 if total>0: for dataset_seq in params['datasets'].split("^"): for d1 in result['aggregations']['my_aggs1']['buckets'][dataset_seq]['my_aggs2']['buckets']: for d2 in d1['my_aggs3']['buckets']: depth_level = d1['key'].split(">") dataset_name = mariadb.get_dataset_name(dataset_seq) if mariadb.get_dataset_name(dataset_seq)!=None else 'unknown' worksheet.write(1+row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1+row, 1, re.sub("[\[\]]", "", depth_level[0]) if len(depth_level)>=0 else "", self.default) # 데이터셋 이름 worksheet.write(1+row, 2, re.sub("[\[\]]", "", depth_level[1]) if len(depth_level)>=1 else "", self.default) # 데이터셋 이름 worksheet.write(1+row, 3, re.sub("[\[\]]", "", depth_level[2]) if len(depth_level)>=2 else "", self.default) # 데이터셋 이름 worksheet.write(1+row, 4, d2['key'], self.default) # 데이터셋 이름 worksheet.write(1+row, 5, d2['doc_count'], self.default) # 데이터셋 이름 row += 1 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, '', self.header) worksheet.write(row+1, 3, '', self.header) worksheet.write(row+1, 4, '', self.header) worksheet.write(row+1, 5, total, self.header)
def topics_list(self, params): worksheet = self.workbook.add_worksheet( "화제어_명사(%s)" % "~".join([params['start_date'][0:10], params['end_date'][0:10]])) # 헤더 worksheet.write(0, 0, '순위', self.header) worksheet.write(0, 1, '화제어', self.header) worksheet.write(0, 2, '문서수', self.header) if not self.compare: worksheet.write(0, 3, '연관어', self.header) worksheet.write(0, 4, '문서수', self.header) #worksheet.write(0, 6, '표현어', self.header) # 데이터 result_topic = es.get_aggregations(self.query.TOPICS_LIST(), params, self.INDEX_TOPICS) if not self.compare: row = 0 for seq, bucket1 in enumerate( result_topic['aggregations']['my_aggs1']['buckets']): topic = bucket1['key'] if len(bucket1['my_aggs2']['buckets']) > 0: for bucket2 in bucket1['my_aggs2']['buckets']: #phrase = re.sub("[\[\]]", "", topic) + " AND " + re.sub("[\[\]]", "", bucket2['key']) #result_verb = es.get_aggregations(self.query.TOPICS_VERBS_LIST(), params, Query.INDEX_TOPICS) #verb_list = ",".join([ bucket3['key'] for bucket3 in result_verb['aggregations']['my_aggs3']['buckets'] ]) worksheet.write(1 + row, 0, 1 + seq, self.default) worksheet.write(1 + row, 1, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1 + row, 2, bucket1['doc_count'], self.default) worksheet.write(1 + row, 3, bucket2['key'], self.default) worksheet.write(1 + row, 4, bucket2['doc_count'], self.default) #worksheet.write(1+row, 6, verb_list, self.default) row += 1 else: worksheet.write(1 + row, 0, 1 + seq, self.default) worksheet.write(1 + row, 1, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1 + row, 2, bucket1['doc_count'], self.default) worksheet.write(1 + row, 3, '', self.default) worksheet.write(1 + row, 4, '', self.default) #worksheet.write(1+row, 6, '', self.default) row += 1 else: row = 0 for seq, bucket1 in enumerate( result_topic['aggregations']['my_aggs1']['buckets']): topic = bucket1['key'] worksheet.write(1 + row, 0, 1 + seq, self.default) worksheet.write(1 + row, 1, re.sub("[\[\]]", "", topic), self.default) worksheet.write(1 + row, 2, bucket1['doc_count'], self.default) row += 1
def depth2_channel_occupations_in_documents(self, params, depth1_seq): sheet_name = '' if depth1_seq is Channel.COMMUNITY: sheet_name = '커뮤니티 문서량' elif depth1_seq is Channel.MEDIA: sheet_name = '미디어 문서량' elif depth1_seq is Channel.SNS: sheet_name = 'SNS 문서량' elif depth1_seq is Channel.PORTAL: sheet_name = '포털 문서량' print("sheet_name :: %s" % sheet_name) worksheet = self.workbook.add_worksheet(sheet_name) result = es.get_aggregations( self.query.DEPTH2_CHANNEL_OCCUPATIONS(depth1_seq.value, self.compare), params, self.INDEX_NAME) total = result['hits']['total'] if not self.compare: # 헤더 worksheet.write(0, 0, '채널별', self.header) worksheet.write(0, 1, '문서수', self.header) worksheet.write(0, 2, '비율(%)', self.header) # 데이터 total_percentage = 0.0 row = 1 for bucket in result['aggregations']['my_aggs1']['buckets']: worksheet.write(row, 0, bucket['key'], self.default) # Depth1 worksheet.write(row, 1, bucket['doc_count'], self.default) worksheet.write(row, 2, bucket['doc_count'] / total * 100, self.default) total_percentage += bucket['doc_count'] / total * 100 row += 1 # 합계 if len(params['datasets'].split("^")) == 1: worksheet.write(row, 0, "합계", self.header) worksheet.write(row, 1, total, self.header) worksheet.write(row, 2, total_percentage, self.header) else: worksheet.write(0, 0, '채널별', self.header) worksheet.write(0, 1, '날짜', self.header) worksheet.write(0, 2, '문서수', self.header) # 데이터 row = 1 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: worksheet.write(row, 0, bucket1['key'], self.default) # Depth1 worksheet.write(row, 1, bucket2['key'], self.default) worksheet.write(row, 2, bucket2['doc_count'], self.default) row += 1 # 합계 if len(params['datasets'].split("^")) == 1: worksheet.write(row, 0, "합계", self.header) worksheet.write(row, 1, '', self.header) worksheet.write(row, 2, total, self.header)
def emotions_per_causes(self, params): worksheet = self.workbook.add_worksheet('언급원인별 분석') #arr_dataset_names = self.dataset_names.split(",") if not self.compare: # 헤더 worksheet.write(0, 0, '1Depth', self.header) worksheet.write(0, 1, '2Depth', self.header) worksheet.write(0, 2, '3Depth', self.header) worksheet.write(0, 3, '대분류', self.header) worksheet.write(0, 4, '중분류', self.header) worksheet.write(0, 5, '소분류', self.header) worksheet.write(0, 6, '긍부정', self.header) worksheet.write(0, 7, '분석량', self.header) # 데이터 qdsl = self.queryObj.EMOTIONS_PER_CAUSES(params, self.compare) self.logger.debug("[ReportEmotions][emotions_per_causes] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) #total = result['hits']['total'] total = 0 row = 0 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket3 in bucket1['my_aggs2']['my_aggs3']['buckets']: for bucket4 in bucket3['my_aggs4']['buckets']: for bucket5 in bucket4['my_aggs5']['buckets']: for bucket6 in bucket5['my_aggs6']['buckets']: depth_level = bucket1['key'].split(">") worksheet.write(1+row, 0, re.sub("[\[\]]", "", depth_level[0]) if len(bucket1['key'].split(">"))>=0 else '', self.default) worksheet.write(1+row, 1, re.sub("[\[\]]", "", depth_level[1]) if len(bucket1['key'].split(">"))>=1 else '', self.default) worksheet.write(1+row, 2, re.sub("[\[\]]", "", depth_level[2]) if len(bucket1['key'].split(">"))>=2 else '', self.default) worksheet.write(1+row, 3, bucket3['key'], self.default) worksheet.write(1+row, 4, bucket4['key'], self.default) worksheet.write(1+row, 5, bucket5['key'], self.default) worksheet.write(1+row, 6, bucket6['key'], self.default) worksheet.write(1+row, 7, bucket6['doc_count'], self.default) total += bucket6['doc_count'] row += 1 # 합꼐 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, '', self.header) worksheet.write(row+1, 3, '', self.header) worksheet.write(row+1, 4, '', self.header) worksheet.write(row+1, 5, '', self.header) worksheet.write(row+1, 6, '', self.header) worksheet.write(row+1, 7, total, self.header) else: # 헤더 worksheet.write(0, 0, '날짜', self.header) worksheet.write(0, 1, '1Depth', self.header) worksheet.write(0, 2, '2Depth', self.header) worksheet.write(0, 3, '3Depth', self.header) worksheet.write(0, 4, '대분류', self.header) worksheet.write(0, 5, '중분류', self.header) worksheet.write(0, 6, '소분류', self.header) worksheet.write(0, 7, '긍부정', self.header) worksheet.write(0, 8, '분석량', self.header) # 데이터 qdsl = self.queryObj.EMOTIONS_PER_CAUSES(params, self.compare) self.logger.debug("[ReportEmotions][emotions_per_causes] %s" % qdsl) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) #total = result['hits']['total'] total = 0 row = 0 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket3 in bucket1['my_aggs2']['buckets']: for bucket4 in bucket3['my_aggs3']['my_aggs4']['buckets']: for bucket5 in bucket4['my_aggs5']['buckets']: for bucket6 in bucket5['my_aggs6']['buckets']: for bucket7 in bucket6['my_aggs7']['buckets']: # 날짜 범위 worksheet.write(1+row, 0, bucket1['key'], self.default) # 1Depth, 2Depth, 3Depth depth_level = bucket3['key'].split(">") worksheet.write(1+row, 1, re.sub("[\[\]]", "", depth_level[0]) if len(bucket3['key'].split(">"))>=0 else '', self.default) worksheet.write(1+row, 2, re.sub("[\[\]]", "", depth_level[1]) if len(bucket3['key'].split(">"))>=1 else '', self.default) worksheet.write(1+row, 3, re.sub("[\[\]]", "", depth_level[2]) if len(bucket3['key'].split(">"))>=2 else '', self.default) worksheet.write(1+row, 4, bucket4['key'], self.default) # 대분류 worksheet.write(1+row, 5, bucket5['key'], self.default) # 중분류 worksheet.write(1+row, 6, bucket6['key'], self.default) # 소분류 worksheet.write(1+row, 7, bucket7['key'], self.default) # 긍부정 worksheet.write(1+row, 8, bucket7['doc_count'], self.default) total += bucket7['doc_count'] row += 1 # 합꼐 if len(params['datasets'].split("^"))==1: worksheet.write(row+1, 0, '합계', self.header) worksheet.write(row+1, 1, '', self.header) worksheet.write(row+1, 2, '', self.header) worksheet.write(row+1, 3, '', self.header) worksheet.write(row+1, 4, '', self.header) worksheet.write(row+1, 5, '', self.header) worksheet.write(row+1, 6, '', self.header) worksheet.write(row+1, 7, '', self.header) worksheet.write(row+1, 8, total, self.header)
def dataset_occupations_per_depth3_in_documents(self, params): worksheet = self.workbook.add_worksheet('채널별 수집량') #arr_dataset_names = self.dataset_names.split(",") if not self.compare: # 헤더 worksheet.write(0, 0, '데이터셋', self.header) worksheet.write(0, 1, '1Depth', self.header) worksheet.write(0, 2, '2Depth', self.header) worksheet.write(0, 3, '3Depth', self.header) worksheet.write(0, 4, '문서수', self.header) # 데이터 qdsl = self.query.DATASET_OCCUPATIONS_PER_DEPTH3(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 if total > 0: for dataset_seq in params['datasets'].split("^"): for d1 in result['aggregations']['my_aggs1']['buckets'][ dataset_seq]['my_aggs2']['buckets']: depth_level = d1['key'].split(">") dataset_name = mariadb.get_dataset_name( dataset_seq) if mariadb.get_dataset_name( dataset_seq) != None else 'unknown' worksheet.write(1 + row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write(1 + row, 1, re.sub("[\[\]]", "", depth_level[0]) if len(depth_level) >= 0 else "", self.default) # 데이터셋 이름 worksheet.write(1 + row, 2, re.sub("[\[\]]", "", depth_level[1]) if len(depth_level) >= 1 else "", self.default) # 데이터셋 이름 worksheet.write(1 + row, 3, re.sub("[\[\]]", "", depth_level[2]) if len(depth_level) >= 2 else "", self.default) # 데이터셋 이름 worksheet.write(1 + row, 4, d1['doc_count'], self.default) # 데이터셋 이름 row += 1 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, '', self.header) worksheet.write(row + 1, 3, '', self.header) worksheet.write(row + 1, 4, total, self.header) else: # 헤더 worksheet.write(0, 0, '데이터셋', self.header) worksheet.write(0, 1, '1Depth', self.header) worksheet.write(0, 2, '2Depth', self.header) worksheet.write(0, 3, '3Depth', self.header) worksheet.write(0, 4, '날짜', self.header) worksheet.write(0, 5, '문서수', self.header) # 데이터 qdsl = self.query.DATASET_OCCUPATIONS_PER_DEPTH3(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 if total > 0: for dataset_seq in params['datasets'].split("^"): for d1 in result['aggregations']['my_aggs1']['buckets'][ dataset_seq]['my_aggs2']['buckets']: for d2 in d1['my_aggs3']['buckets']: depth_level = d1['key'].split(">") dataset_name = mariadb.get_dataset_name( dataset_seq) if mariadb.get_dataset_name( dataset_seq) != None else 'unknown' worksheet.write(1 + row, 0, dataset_name, self.default) # 데이터셋 이름 worksheet.write( 1 + row, 1, re.sub("[\[\]]", "", depth_level[0]) if len(depth_level) >= 0 else "", self.default) # 데이터셋 이름 worksheet.write( 1 + row, 2, re.sub("[\[\]]", "", depth_level[1]) if len(depth_level) >= 1 else "", self.default) # 데이터셋 이름 worksheet.write( 1 + row, 3, re.sub("[\[\]]", "", depth_level[2]) if len(depth_level) >= 2 else "", self.default) # 데이터셋 이름 worksheet.write(1 + row, 4, d2['key'], self.default) # 데이터셋 이름 worksheet.write(1 + row, 5, d2['doc_count'], self.default) # 데이터셋 이름 row += 1 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, '', self.header) worksheet.write(row + 1, 3, '', self.header) worksheet.write(row + 1, 4, '', self.header) worksheet.write(row + 1, 5, total, self.header)
def emotions_per_causes(self, params): worksheet = self.workbook.add_worksheet('언급원인별 분석') #arr_dataset_names = self.dataset_names.split(",") if not self.compare: # 헤더 worksheet.write(0, 0, '1Depth', self.header) worksheet.write(0, 1, '2Depth', self.header) worksheet.write(0, 2, '3Depth', self.header) worksheet.write(0, 3, '대분류', self.header) worksheet.write(0, 4, '중분류', self.header) worksheet.write(0, 5, '소분류', self.header) worksheet.write(0, 6, '긍부정', self.header) worksheet.write(0, 7, '문서수', self.header) # 데이터 qdsl = self.query.EMOTIONS_PER_CAUSES(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: for bucket3 in bucket2['my_aggs3']['buckets']: for bucket4 in bucket3['my_aggs4']['buckets']: for bucket5 in bucket4['my_aggs5']['buckets']: depth_level = bucket1['key'].split(">") worksheet.write( 1 + row, 0, re.sub("[\[\]]", "", depth_level[0]) if len(bucket1['key'].split(">")) >= 0 else '', self.default) worksheet.write( 1 + row, 1, re.sub("[\[\]]", "", depth_level[1]) if len(bucket1['key'].split(">")) >= 1 else '', self.default) worksheet.write( 1 + row, 2, re.sub("[\[\]]", "", depth_level[2]) if len(bucket1['key'].split(">")) >= 2 else '', self.default) worksheet.write(1 + row, 3, bucket2['key'], self.default) worksheet.write(1 + row, 4, bucket3['key'], self.default) worksheet.write(1 + row, 5, bucket4['key'], self.default) worksheet.write(1 + row, 6, bucket5['key'], self.default) worksheet.write(1 + row, 7, bucket5['doc_count'], self.default) row += 1 # 합꼐 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, '', self.header) worksheet.write(row + 1, 3, '', self.header) worksheet.write(row + 1, 4, '', self.header) worksheet.write(row + 1, 5, '', self.header) worksheet.write(row + 1, 6, '', self.header) worksheet.write(row + 1, 7, total, self.header) else: # 헤더 worksheet.write(0, 0, '1Depth', self.header) worksheet.write(0, 1, '2Depth', self.header) worksheet.write(0, 2, '3Depth', self.header) worksheet.write(0, 3, '대분류', self.header) worksheet.write(0, 4, '중분류', self.header) worksheet.write(0, 5, '소분류', self.header) worksheet.write(0, 6, '긍부정', self.header) worksheet.write(0, 7, '날짜', self.header) worksheet.write(0, 8, '문서수', self.header) # 데이터 qdsl = self.query.EMOTIONS_PER_CAUSES(self.compare) result = es.get_aggregations(copy.copy(qdsl), params, self.INDEX_NAME) total = result['hits']['total'] row = 0 for bucket1 in result['aggregations']['my_aggs1']['buckets']: for bucket2 in bucket1['my_aggs2']['buckets']: for bucket3 in bucket2['my_aggs3']['buckets']: for bucket4 in bucket3['my_aggs4']['buckets']: for bucket5 in bucket4['my_aggs5']['buckets']: for bucket6 in bucket5['my_aggs6']['buckets']: depth_level = bucket1['key'].split(">") worksheet.write( 1 + row, 0, re.sub("[\[\]]", "", depth_level[0]) if len(bucket1['key'].split(">")) >= 0 else '', self.default) worksheet.write( 1 + row, 1, re.sub("[\[\]]", "", depth_level[1]) if len(bucket1['key'].split(">")) >= 1 else '', self.default) worksheet.write( 1 + row, 2, re.sub("[\[\]]", "", depth_level[2]) if len(bucket1['key'].split(">")) >= 2 else '', self.default) worksheet.write(1 + row, 3, bucket2['key'], self.default) worksheet.write(1 + row, 4, bucket3['key'], self.default) worksheet.write(1 + row, 5, bucket4['key'], self.default) worksheet.write(1 + row, 6, bucket5['key'], self.default) worksheet.write(1 + row, 7, bucket6['key'], self.default) worksheet.write(1 + row, 8, bucket6['doc_count'], self.default) row += 1 # 합꼐 if len(params['datasets'].split("^")) == 1: worksheet.write(row + 1, 0, '합계', self.header) worksheet.write(row + 1, 1, '', self.header) worksheet.write(row + 1, 2, '', self.header) worksheet.write(row + 1, 3, '', self.header) worksheet.write(row + 1, 4, '', self.header) worksheet.write(row + 1, 5, '', self.header) worksheet.write(row + 1, 6, '', self.header) worksheet.write(row + 1, 7, '', self.header) worksheet.write(row + 1, 8, total, self.header)