def btn_clicked(self): # QMessageBox.about(self, "message","clicked") SearchClient.select_in(serches) self.close() self.myWindow3 = MyWindow3() self.myWindow3.set_id(self.id) self.myWindow3.show()
def btn_clicked(self): self.id=self.lineEdit.text() self.pw=self.lineEdit_2.text() try: recv_data=SearchClient.send_DBinfo(serches,self.id,self.pw) except: QMessageBox.about(self, "message","로그인중 에러가 발생하였습니다. 다시 시도해주세요") return if recv_data==True: # QMessageBox.about(self, "login","로그인 중 ....") # try: self.label_3.setText("<span style=' font-size:18pt; font-weight:600; color:#005500;'>로그인 중......</span>") self.label.repaint() SearchClient.set_TXset(serches) SearchClient.send_TXset(serches) # except: # self.label_3.setText("") # self.label.repaint() # SearchClient.Initialize_DB(serches) # QMessageBox.about(self, "warning","로그인 정보 전송중 오류가 발생하였습니다.") # return self.close() self.myWindow2 = MyWindow2() self.myWindow2.set_id(self.id) self.myWindow2.show() else: QMessageBox.about(self, "message","아이디 또는 비밀번호가 일치하지않습니다.")
def __init__(self): super().__init__() self.show() self.setupUi(self) self.pushButton.clicked.connect(self.btn_clicked) self.pushButton_2.clicked.connect(self.exit_clicked) self.id="" self.pw="" SearchClient.connectServer(serches)
def btn_clicked2(self): # QMessageBox.about(self, "message","clicked") # self.close() try: SearchClient.insert_in(serches) self.myWindow4 = MyWindow4() self.myWindow4.show() print("ss2") except: print("ss")
def FileUpload(self): open_file = QFileDialog.getOpenFileName(self, 'Open file', "", "All Files(*);; txt File(*.txt)") print("뭐가안맞아") list_n=open_file[0].split('/') file_name=list_n[-1] if open_file[0]: # f = open(open_file[0], 'r') # file_content = f.read() # file_keyword=re.split("[, !?:;]+",file_content) SearchClient.file_insert(serches,file_name,open_file[0]) self.close() else: QMessageBox.about(self, "Warning", "파일을 선택하지 않았습니다.")
def down_clicked(self): try: check_num=0 check_num=SearchClient.down_file(serches) if check_num==0: QMessageBox.about(self, "Warning", "다운 받을 파일이 없습니다") except: QMessageBox.about(self, "message","다운로드 에러입니다 재다운로드 하거나 재검색 해주세요")
def init_es(): if sys.platform == 'darwin': host, port = config.get_es_local() else: # host, port = config.get_es_config() host, port = config.get_es_config_2() return SearchClient(Elasticsearch([{'host': host, 'port': port}]))
class RuleBasedExtractor(object): def __init__(self): self.db = dbcon.connect_torndb() self.mongo = dbcon.connect_mongo() self.client = SearchClient() def replace(self): for replacement in self.mongo.keywords.replacement.find({'active': 'Y'}): source = replacement.get('source') replaces = replacement.get('replacement') if len(replaces) > 1 and dbutil.get_tag_info(self.db, source, 'type') >= 11010: for c in reduce(lambda x, y: x & y, [set(dbutil.get_company_from_tag(self.db, replace)) for replace in replaces]): dbutil.update_company_tag(self.db, c, source, 1.503, 'P') def infer_hierarchically(self): global logger_tag for t2 in dbutil.get_sectored_tags(self.db, 2): t3s = dbutil.get_tags_by_relation(self.db, t2.id, 54041) if t3s: check_point = datetime.now() - timedelta(hours=2) t1s = dbutil.get_hypernym_tags(self.db, t2.id, 1) hierachicals = (set(dbutil.get_company_from_tags(self.db, list(t3s))) & set(dbutil.get_company_from_tags(self.db, t1s))) if len(hierachicals) > 2500: dbutil.clear_company_common_tag(self.db, t2.id, check_point) logger_tag.exception('Hierachical cross threshold, %s, %s' % (t2.name, len(hierachicals))) else: for c in hierachicals: dbutil.update_company_tag(self.db, c, t2.id, 1.504, 'P') dbutil.clear_company_common_tag(self.db, t2.id, check_point) logger_tag.info('Hierachically processed %s' % t2.name) def infer_rules(self): global logger_tag for t in dbutil.get_ruled_tags(self.db): logger_tag.info('Processing rule for %s' % t.name) try: rule = t.rule.replace(u',', u',').replace(u'(', u'(').replace(u')', u')').replace(u' ', u'').lower() rule = generate_rule_based_query(rule) if rule: codes = self.client.search('topic', query=rule).get('company', {}).get('data', []) if len(codes) > 2000: logger_tag.exception('To many results, %s, %s' % (t.name, len(codes))) else: logger_tag.info('%s processed' % t.name) for code in codes: cid = dbutil.get_id_from_code(self.db, code) if not dbutil.exist_company_tag(self.db, cid, t.id): dbutil.update_company_tag(self.db, cid, t.id, 1.505) except Exception, e: logger_tag.exception('Fail to process tag rules %s, due to %s' % (t.name, e))
def test(): global logger_industry db = dbcon.connect_torndb() client = SearchClient() logger_industry.info('Model inited, start to update company oriented topics') idid = 732 industry = Industry(idid) industry.fit_company(client) industry.fit_comps() industry.fit_tag() industry.fit_news() dbutil.update_industry_last_message_time(db, idid) db.close()
def before_request(): if sys.platform == 'darwin': host, port = config.get_es_local() else: host, port = config.get_es_config() es_client = Elasticsearch([{'host': host, 'port': port}]) g.db = dbcon.connect_torndb() g.mongo = dbcon.connect_mongo() g.amacsc = AMACClient(es_client) g.usc = UniversalSearchClient(es_client) g.dsc = DealSearchClient(es_client, False) g.nsc = NewsSearchClient(es_client) g.rsc = ReportSearchClient(es_client) g.isc = InteriorSearchClient(es_client) g.dtsc = DigitalTokenSearchClient(es_client) g.sc = SearchClient(es_client) g.logger = dbcon.connect_mongo().log.search
def update_industries(): global logger_industry logger_industry.info('Start to process industries') while True: db = dbcon.connect_torndb() client = SearchClient() logger_industry.info('Model inited, start to update industries') for idid, _ in dbutil.get_industries(db): try: logger_industry.info('Processing %s' % idid) industry = Industry(idid) industry.fit_company(client) industry.fit_comps() industry.fit_tag() industry.fit_news() dbutil.update_industry_last_message_time(db, idid) except Exception, e: logger_industry.exception('%s failed, %s' % (idid, e)) db.close() time.sleep(1800)
def btn_clicked(self): i=0 # if self.check_num !=0: SearchClient.loop_check(serches) self.listWidget.clear() try: print("kwon1") SearchClient.input_keyword(serches,self.plainTextEdit.toPlainText()) print("kwon2") SearchClient.startwork(serches) print("kwon3") SearchClient.getIds(serches) print("kwon4") SearchClient.send_fileid(serches) print("kwon5") files=SearchClient.get_file(serches) print("kwon16") while i<len(files): self.listWidget.addItem(files[i]) i=i+1 print("kwon122") self.check_num=self.check_num+1 SearchClient.select_send(serches) print("kwon1226") except: QMessageBox.about(self, "message","검색중 에러가 발생하였습니다 다시 검색해주세요") SearchClient.loop_check(serches)
class UniversalSearchClient(object): global logger_universal logger = logger_universal general_client = SearchClient() mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() def __init__(self, es=None): if not es: host, port = tsbconfig.get_es_config() self.es = Elasticsearch([{'host': host, 'port': port}]) else: self.es = es self.max_result_size = 1000 def search(self, type, **kwargs): self.logger.info('Query: %s' % (str(kwargs))) return { 'general': self.__search_universal, 'combined': self.__search_combined, 'investor': self.__search_investor, 'industry': self.__search_industry, 'topic': self.__search_topic, 'ranklist': self.__search_ranklist, 'event': self.__search_event, 'completion': self.__search_completion }[type](**kwargs) def __search_completion(self, **kwargs): kv = dict(**kwargs) key, field = kv.get('key'), kv.get('field') # ranking benchmark rank_key = lambda x: x.get('ranking_score', 0.01) # query, diff with field if field: query = templates.get_field_completion(key, field) hits = self.es.search(index="xiniudata2", doc_type="completion", body={"query": query, "size": 20}) results = { field: FrozenLenList(10) } else: query = templates.get_completion(key) # self.logger.info(query) hits = self.es.search(index="xiniudata2", doc_type="completion", body={"query": query, "size": 100}) results = { 'name': FrozenLenList(8), 'keyword': SortedFixLenList(3, rank_key), 'location': FrozenLenList(1), 'investor': SortedFixLenList(2, rank_key), 'industry': FrozenLenList(1) } # result success check if ('error' in hits) or hits.get('time_out'): return {'status': 'failed'} hits = hits['hits']['hits'] if len(hits) == 0 or (not hits): return {'status': 'empty'} # results ranking and format hits = map(lambda x: x['_source'], filter(lambda item: '_source' in item, hits)) for hit in hits: # self.logger.info(hit) prompt = hit.get('_prompt') or 'name' results[prompt].append(hit) return results def __search_combined(self, **kwargs): query = dict(kwargs) results = {'relevant': []} companies = self.__search_universal(**query) results['company'] = companies.get('company', {}) results['investor'] = self.__search_investor(**query).get('investor', {}) industris = self.general_client.search('completion', key=query.get('input', ''), field='industry').get('industry', []) industris = map(lambda x: {'name': x.get('_name'), 'code': x.get('_code'), 'id': x.get('id')[8:]}, industris) for ind in companies.get('industry', []): if ind.get('id') in [industry.get('id') for industry in industris]: continue else: industris.append(ind) results['industry'] = industris return results def __search_universal(self, **kwargs): query = dict(kwargs) start = query.get('start', 0) size = min(query.get('size', 10), self.max_result_size) highlight = query.get('highlight', False) sort = query.get('sort', 76001) order = query.get('order', 'default') # query preparation general_query = UniversalQuery(query.get('input'), query.get('filter')) es_query = general_query.generate_query() intent = general_query.get_intent() self.logger.info('ES Query Generated, intent %s' % intent) # fast hint if not intent == 'tag': fast_hint = self.__fast_hint(query, highlight, intent) if fast_hint: self.logger.info('Fast hint') return fast_hint # rethink sort function if sort == 76001 and intent == 'general': sort = 0 order = 'desc' # logger_universal.info('sort %s, order %s' % (sort, order)) hits = self.es.search(index='xiniudata2', doc_type='universal', body={"query": es_query, "sort": self.__generate_sort_search(sort, order), "from": start, "size": size, "highlight": self.__generate_highlight_search()}) count = hits['hits'].get('total', 0) hits = self.__organize(hits, highlight) self.logger.info('Result ready') return {"company": {"count": count, "data": hits}} def __search_investor(self, **kwargs): global max_size query = dict(kwargs) start = query.get('start', 0) size = min(query.get('size', 10), self.max_result_size) sort = query.get('sort', 76001) order = query.get('order', 'default') investor_query = InvestorQuery(query.get('input'), query.get('filter')) es_query = investor_query.generate_query() if sort == 76001 and (investor_query.get_intent() == 'tag' or query.get('filter', {}).get('tag')): sort = 76008 nested_tag = investor_query.get_tag() if sort == 76008 else None self.logger.info('ES, %s' % es_query) self.logger.info('Sort, %s' % self.__generate_sort_search(sort, order, nested_tag)) hits = self.es.search(index='xiniudata2', doc_type='investor', body={"query": es_query, "sort": self.__generate_sort_search(sort, order, nested_tag), "from": start, "size": size}) count = hits['hits'].get('total', 0) hits = self.__organize(hits) self.logger.info('Result ready, %s' % str(hits)) return {"investor": {"count": count, "data": hits}} def __search_industry(self, **kwargs): query = dict(kwargs) start = query.get('start', 0) size = min(query.get('size', 10), self.max_result_size) sort = query.get('sort', 1) order = query.get('order', 'default') idid = query.get('industry', 0) general_query = UniversalQuery(query.get('input'), query.get('filter'), {'industry': query.get('industry')}) es_query = general_query.generate_query() logger_universal.info('ES %s, industry %s' % (es_query, idid)) hits = self.es.search(index='xiniudata2', doc_type='universal', body={"query": es_query, "sort": self.__generate_sort_search(sort, order, idid), "from": start, "size": size}) count = hits['hits'].get('total', 0) hits = self.__organize(hits) self.logger.info('Result ready') sector_filters = self.__get_sector_filter(idid, 'industry') return {"company": {"count": count, "data": hits, 'sectors': sector_filters}} def __search_topic(self, **kwargs): query = dict(kwargs) start = query.get('start', 0) size = min(query.get('size', 10), self.max_result_size) sort = query.get('sort', 76001) order = query.get('order', 'default') tpid = query.get('topic', 0) if sort == 76001: sort = 76020 general_query = UniversalQuery(query.get('input'), query.get('filter'), {'topic': query.get('topic')}) es_query = general_query.generate_query() logger_universal.info('ES %s, topic %s' % (es_query, tpid)) hits = self.es.search(index='xiniudata2', doc_type='universal', body={"query": es_query, "sort": self.__generate_sort_search(sort, order, tpid), "from": start, "size": size}) count = hits['hits'].get('total', 0) hits = self.__organize(hits) self.logger.info('Result ready') sector_filters = self.__get_sector_filter(tpid, 'topic') return {"company": {"count": count, "data": hits, 'sectors': sector_filters}} def __search_ranklist(self, **kwargs): query = dict(kwargs) start = query.get('start', 0) size = min(query.get('size', 10), self.max_result_size) sort = query.get('sort', 76001) order = query.get('order', 'default') tag = query.get('filter', {}).get('tag') if not tag: return {"company": {"count": 0, "data": [], 'sectors': []}} tag = tag[0] tid = dbutil.get_tag_id(self.db, tag)[0] general_query = UniversalQuery(query.get('input'), query.get('filter')) es_query = general_query.generate_query() logger_universal.info('ES %s, topic %s' % (es_query, tag)) hits = self.es.search(index='xiniudata2', doc_type='universal', body={"query": es_query, "sort": self.__generate_sort_search(sort, order, tid), "from": start, "size": size}) count = hits['hits'].get('total', 0) hits = self.__organize(hits) self.logger.info('Result ready') sector_filters = self.__get_sector_filter(tag, 'tag') return {"company": {"count": count, "data": hits, 'sectors': sector_filters}} def __search_event(self, **kwargs): query = dict(kwargs) start = query.get('start', 0) size = min(query.get('size', 10), self.max_result_size) sort = query.get('sort', 1) order = query.get('order', 'default') # sector filter needed if query.get('filter', {}).get('investor'): investor = query.get('filter', {}).get('investor') if investor: investor = investor[0] sector_filters = self.__get_sector_filter(investor, 'investor') else: sector_filters = [] else: sector_filters = [] event_query = EventQuery(query.get('filter')) es_query = event_query.generate_query() self.logger.info('ES, %s' % es_query) hits = self.es.search(index='xiniudata2', doc_type='event', body={"query": es_query, "sort": self.__generate_sort_search(sort, order), "from": start, "size": size}) count = hits['hits'].get('total', 0) hits = self.__organize(hits) self.logger.info('Result ready, %s' % str(hits)) return {"funding": {"count": count, "data": hits, "sectors": sector_filters}} def __get_sector_filter(self, source, ftype): sector_filters = self.mongo.keywords.sector_filters.find_one({'source': source, 'filter_type': ftype}) sector_filters = sector_filters.get('sectors', []) if sector_filters else [] sector_filters = [dbutil.get_tag_name(self.db, tid) for tid in sector_filters] return sector_filters def __fast_hint(self, query, highlight=False, intent='default'): if self.__exist_filter(query.get('filter', {})): return False if isinstance(query.get('input'), str) or isinstance(query.get('input'), unicode): key = query.get('input').strip().lower() if intent == 'tag': return False # 有限公司 if u'有限公司' in key: codes = self.general_client.search('completion', key=key, field='name') if codes and codes.get('name', False) and 0 < len(codes.get('name')) < 5: if highlight: highlight_field = lambda x: 'name' if x else 'alias' codes = [{'id': item.get('_code'), 'highlight': highlight_field(key == item.get('_name'))} for item in codes.get('name')] else: codes = list(set([item.get('_code') for item in codes.get('name')])) industries = self.__find_industry(codes) return {"company": {"count": len(codes), "data": codes}, "filter": {"tag": []}, "relate": [], "industry": industries} # complete result count equal to 1 or 2 else: completion = self.general_client.search('completion', key=key, field='name') if 0 < len(completion.get('name', [])) < 3: if highlight: highlight_field = lambda x: 'name' if x else 'alias' codes = [{'id': item.get('_code'), 'highlight': highlight_field(key == item.get('_name'))} for item in completion.get('name')] else: codes = list(set([item.get('_code') for item in completion.get('name')])) industries = self.__find_industry(codes) return {"company": {"count": len(codes), "data": codes}, "filter": {"tag": []}, "relate": [], 'collection_preservable': False, "industry": industries} return False def __find_industry(self, codes): cids = [dbutil.get_id_from_code(self.db, code) for code in codes] idids = [ind.industryId for ind in dbutil.get_company_industries(self.db, cids, True)] inds = [dbutil.get_industry_info(self.db, idid) for idid in idids] return [{"id": ind.id, "code": ind.code, "name": ind.name} for ind in inds if (ind.active is None or ind.active == 'Y')] def __organize(self, hits, highlight=False): if ('error' in hits) or hits.get('time_out'): return {'status': 'failed'} if len(hits['hits']['hits']) == 0 or (not hits['hits']['hits']): return [] hits = hits['hits']['hits'] if highlight: return [{"id": result['_id'], "highlight": self.__highlight_sort(result.get('highlight', {}).keys())} for result in hits] else: return [result['_id'] for result in hits] def __generate_sort_search(self, sort=0, order='default', nested_value=None): if sort == 76001: if order == 'default' or order == 'desc': return [{"ranking_score": {"order": "desc", "missing": "_last"}}] return [{"ranking_score": {"order": "asc", "missing": "_last"}}] elif sort == 76002: if order == 'default' or order == 'desc': return [{"sort_sector": {"order": "desc", "missing": "_last"}}] return [{"sort_sector": {"order": "asc", "missing": "_last"}}] elif sort == 76003: if order == 'default' or order == 'desc': return [{"sort_location": {"order": "desc", "missing": "_last"}}] return [{"sort_location": {"order": "asc", "missing": "_last"}}] elif sort == 76004: if order == 'default' or order == 'desc': return [{"last_funding_date": {"order": "desc", "missing": "_last"}}] return [{"last_funding_date": {"order": "asc", "missing": "_last"}}] elif sort == 76005: if order == 'asc' or order == 'default': return [{"sort_round": {"order": "asc", "missing": "_last"}}] return [{"sort_round": {"order": "desc", "missing": "_last"}}] elif sort == 76006: if order == 'default' or order == 'desc': return [{"last_funding_amount": {"order": "desc", "missing": "_last"}}] return [{"last_funding_amount": {"order": "asc", "missing": "_last"}}] elif sort == 76007: if order == 'default' or order == 'desc': return [{"established": {"order": "desc", "missing": "_last"}}] return [{"established": {"order": "asc", "missing": "_last"}}] elif sort == 76008: if order == 'default' or order == 'desc': return [{'investor_tag.confidence': {"order": "desc", "missing": "_last", "nested_path": "investor_tag", "nested_filter": { "term": {"investor_tag.tag": nested_value} }}}] return [{'investor_tag.confidence': {"order": "asc", "missing": "_last", "nested_path": "investor_tag", "nested_filter": { "term": {"investor_tag.tag": nested_value} }}}] elif sort == 76009: if order == 'default' or order == 'desc': return [{"portfolio_number_annual": {"order": "desc", "missing": "_last"}}] return [{"portfolio_number_annual": {"order": "asc", "missing": "_last"}}] elif sort == 76010: if order == 'default' or order == 'desc': return [{"portfolio_number": {"order": "desc", "missing": "_last"}}] return [{"portfolio_number": {"order": "asc", "missing": "_last"}}] elif sort == 76020: if order == 'default' or order == 'desc': return [{'nested_tag.published': {"order": "desc", "missing": "_last", "nested_path": "nested_tag", "nested_filter": { "term": {"nested_tag.id": nested_value} }}}] return [{'nested_tag.published': {"order": "asc", "missing": "_last", "nested_path": "nested_tag", "nested_filter": { "term": {"nested_tag.id": nested_value} }}}] else: return [] def __generate_highlight_search(self): return { "fields": { "name": {}, "alias": {}, "tags": {}, "investors": {}, "members": {}, "description": {} } } def __exist_filter(self, f): for k, v in f.items(): if v: return True return False def __highlight_sort(self, highlights): if not highlights: return 'name' if 'name' in highlights: return 'name' if 'tags' in highlights: return 'tags' if 'investors' in highlights: return 'investors' if 'members' in highlights: return 'members' if 'alias' in highlights: return 'alias' if 'description' in highlights: return 'description' return highlights[0]
def __init__(self): self.db = dbcon.connect_torndb() self.mongo = dbcon.connect_mongo() self.client = SearchClient()
def closeEvent(self, event): SearchClient.insert_exit(serches) self.deleteLater()
if __name__ == '__main__': import json import time from client import SearchClient sc = SearchClient('../conf') # 1. Lexical query. query1 = {'ana.lex': 'vbcvqr'} query1 = sc.make_word_ana_query(query1) print('query1 (words):', json.dumps(query1, ensure_ascii=False)) hits = sc.get_words(query1) print('Results of query1:') print(json.dumps(hits, ensure_ascii=False, indent=1)) # 2. Grammar query. query2 = {'ana.gr.tense': 't7', 'ana.gr.pers': '2'} query2 = sc.make_word_ana_query(query2) print('query2 (words):', json.dumps(query2, ensure_ascii=False)) hits = sc.get_words(query2) print('Results of query2:') print(json.dumps(hits, ensure_ascii=False, indent=1)) # 3. Grammar query in sentences: query2 = {'ana.gr.tense': 't7', 'ana.gr.pers': '2'} query2 = sc.make_sent_ana_query(query2) print('query2 (sentences):', json.dumps(query2, ensure_ascii=False)) hits = sc.get_sentences(query2) print('Results of query2:') print('Hits:', hits['hits']['total'], ', took: ', hits['took'], 'ms.')
def exit_clicked(self): SearchClient.loop_check2(serches) self.close()
def back_clicked(self): SearchClient.back_check(serches) self.close() self.myWindow2 = MyWindow2() self.myWindow2.set_id(self.id) self.myWindow2.show()
import sys from PyQt5.QtWidgets import * from PyQt5 import uic import time from client import SearchClient import re form_class=uic.loadUiType("./ui/login.ui")[0] form_class2=uic.loadUiType("./ui/scan_ready.ui")[0] form_class3=uic.loadUiType("./ui/scan.ui")[0] # form_class4=uic.loadUiType("./ui/insert.ui")[0] serches=SearchClient() class MyWindow(QMainWindow,form_class): def __init__(self): super().__init__() self.show() self.setupUi(self) self.pushButton.clicked.connect(self.btn_clicked) self.pushButton_2.clicked.connect(self.exit_clicked) self.id="" self.pw="" SearchClient.connectServer(serches) def btn_clicked(self): self.id=self.lineEdit.text() self.pw=self.lineEdit_2.text() try: recv_data=SearchClient.send_DBinfo(serches,self.id,self.pw) except: QMessageBox.about(self, "message","로그인중 에러가 발생하였습니다. 다시 시도해주세요") return