def fulltext(self): """obtain error counts from elasticsearch """ u = conf['ELASTICSEARCH_URL'] es = elasticsearch2.Elasticsearch(u) pipeline = 'backoffice-fulltext_pipeline' # get start time start = Search(using=es, index='_all') \ .query('match', **{'@message': 'Loading records from: /proj/ads/abstracts/config/links/fulltext/all.links'}) \ .filter('match', **{'_type': pipeline}) \ .execute() \ .hits[0] \ .timestamp # convert to datetime object start = datetime.strptime(start.split('.')[0], '%Y-%m-%dT%H:%M:%S') self.values['ft_start'] = start # fulltext pipeline runs for ~15 hours without forcing extraction if (datetime.now() - start) < timedelta(hours=15): print("fulltext pipeline is most likely not done processing.") else: total_num_errors = 0 for err in conf['FULLTEXT_ERRORS']: bibs = [] s = Search(using=es, index='_all') \ .filter('range', **{'@timestamp': {'gte': start, 'lt': 'now'}}) \ .query('query_string', query=err) \ .filter('match', **{'_type': pipeline}) err_str = "_".join(err.split('"')[1].split()).replace( '-', '_').replace(']', '').replace('[', '') filename = str(start).split()[0] + "_" + err_str + ".txt" dir = "data/ft/" + err_str + '/' + filename #if not os.path.isfile(dir): with open(dir, "w") as f: for hit in s.scan(): if "Retrying" in hit.message: continue if (re.findall(r"'(.*?)'", hit.message)[0] == 'bibcode') or (re.findall( r"'(.*?)'", hit.message)[0] == 'UPDATE'): bib = re.search(r"u'bibcode': u'(.*?)'", hit.message).group(1) f.write(bib + '\n') bibs.append(bib) else: bib = re.findall(r"'(.*?)'", hit.message)[0] f.write(bib + '\n') bibs.append(bib) count = len(set(bibs)) self.values[err_str + "_total"] = count total_num_errors += count self.values['total_fulltext_errors'] = total_num_errors
def post(self, request): es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': '9200'}]) req = JSONParser().parse(request) data = req['search'] if not data: return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': 'search word param is missing'}) # 검색어 전처리 # : 기준 split data_list = [] search_words = data.split(":") # 전체검색 if len(search_words) <= 1: m = MultiMatch(query=search_words[0], fields=[ "category", "region", "distance", "transportation", "_explain", "point_name", ]) s = Search(using=es, index='walkingtrails-index').query(m)[:10000] res = s.execute() for data in res: data_list.append(data.to_dict()) return Response(data_list) # 쿼리별 검색 key = None for s in search_words: key = transkey(key) words = s.split(' ') if key == None: key = words[0] continue try: value = " ".join(words[:len(words) - 1]) if len(words) > 2 else words[0] if "'" in value: value = re.sub("\'", "", value) s = Search(using=es, index='walkingtrails-index').query( 'term', point_name=value)[:10000] else: # if key == 'dog_ok': m = MultiMatch(query=value, fields=[key]) s = Search(using=es, index='walkingtrails-index').query(m)[:10000] res = s.execute() d_list = [] for data in res: d_list.append(data.to_dict()) data_list.append(d_list) key = words[-1] except: return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': 'wrong query'}) # s = Search(using=es, index='walkingtrails-index').query( # 'multi_match', # query=data, # fuzziness='auto', # fields=['region'] # )[:10000] result = data_list[0] for data_li in data_list: result = list( map( dict, set(tuple(sorted(d.items())) for d in result) & set(tuple(sorted(d.items())) for d in data_li))) return Response(result)