class QueryHandler: # TODO: Query Interface for deep doc search. def __init__(self): self.es = Elasticsearch() self.ch = ContentHandler() def lite_search(self, query): if not isinstance(query, str): return [] processed_query = self.ch.transform(query) if processed_query == '': return [] # TODO: Proper tf-idf freq. res = self.es.search( { "query": { "more_like_this": { "fields": ["title_index", "abstract_index"], "like": processed_query, "min_term_freq": 1, "max_query_terms": 15, "min_doc_freq": 1 } } }, index=constants.DEFAULT_LITE_INDEX) return res["hits"]["hits"]
class ParkingService(Base): def __init__(self): super(ParkingService, self).__init__() self._api_handler = ApiHandler() self._content_handler = ContentHandler() # db_handler is public for testing purpose self.db_handler = DateBaseHandler() # TODO: implement functionality for multiple images def check_plate(self, image_name): response = self._api_handler.post_by_file_name(image_name) text = self._content_handler.get_parsed_text(response) is_allowed = not self._is_transport_with_no_letters(text) and \ not self._is_public_transport(text) and \ not self._is_military_transport(text) self._store_result(text, is_allowed) self.debug_info('Plate', text) self._print_result(is_allowed) # Returning text and status for testing purpose return [text, is_allowed] def set_wrong_api_url(self): """ For testing purpose ONLY """ self._api_handler = ApiHandler( api_url='https://api.ocr.space/parse/imageasd') def _is_public_transport(self, text): last_symbol = text[-1] return last_symbol == 'G' or last_symbol == '6' def _is_military_transport(self, text): return 'L' in text or 'M' in text def _is_transport_with_no_letters(self, text): return text.isdecimal() def _store_result(self, plate, is_allowed): if is_allowed: status = 'allowed' else: status = 'rejected' self.db_handler.store_plate_info(plate, status) def _print_result(self, is_allowed): if is_allowed: print('You may park here') return print('YOU SHALL NOT PASS')
def add_content(): """ Send user choice on this scene Data expected: 'title': "default", 'scenes':{ 'content': 'firstSceneVideo.mp4', 'choices': {'good': 'Une Demi', 'bad': "Un demi"}, 'good': {'content': "MyGoodAnswer.mp4", 'reason': "Yeah cheers to that"}, 'bad': {'content': "MyBadAnswre.mp4", 'reason': "You should know that, drink more"} } """ data = json.loads(request.data) password = data.get('password') log.info("admin request with password: %s" % password) with open('config.json', 'r') as f: if password != json.load(f).get('admin', {}).get('password', ''): raise InvalidUsage("Passord credentials required", status_code=401) data = data['content'] content = { 'title': data['title'], 'scenes': [{ 'content': scene['content'], 'choices': scene['choices'], 'good': scene['good'], 'bad': scene['bad'] } for scene in data['scenes']] } content_id = data.get('_id') if content_id: log.debug("updating content %s" % content_id) ContentHandler().update_content(content_id, content) else: log.debug("New content") ContentHandler().add_content(content) return json.dumps({})
def new_game(self, user_id): content = ContentHandler().get_content(played=self.get_user_history(user_id)) battle_tag = "%s#%d" % (UserHandler().get_user(user_id)['username'], random.randint(1111, 9999)) entry = { 'battle_tag': battle_tag, 'content_id': content['_id'], 'users': {user_id: {'result': []}}, 'status': 'requested' } result = self.game.find_one({'_id': self.game.insert_one(entry).inserted_id}) result['title'] = content['title'] log.info(result) return result
def get_player_history(self, user_id): history = [] key = "users.%s" % user_id games = self.game.find({key: {'$exists': 1}}) for game in games: content = ContentHandler().get_content(content_id=game['content_id']) res = {'score': [], 'opponents': {}, 'title': content['title']} for key in game['users']: if user_id == key: res['score'] = game['users'][key]['result'] else: name = UserHandler().get_user(key)['username'] res['opponents'][name] = game['users'][key]['result'] if len(res['score']) > 0 or len(res['opponents'].keys()) > 1: history.append(res) return history
def get_scene(self, user_id, game_id): game = self.game.find_one({'_id': ObjectId(game_id)}) log.info(game) scene_id = len(game['users'][user_id]['result']) return ContentHandler().get_content_scene(game['content_id'], scene_id)
""" import sys from pprint import pprint import pandas as pd from elasticsearch import Elasticsearch import constants from content_handler import ContentHandler sys.path.append('../datasets/') import dataset_config es = Elasticsearch() ch = ContentHandler() es.indices.create(constants.DEFAULT_LITE_INDEX, ignore=[constants.ErrorConstants.INDEX_EXISTS]) print('[INFO] Index created') print('[INFO] Reading abstracts') docs_to_be_indexed = [] df = pd.read_csv(constants.ABSTRACTS_CSV_PATH) for i in range(0, df.shape[0]): print('[INFO] Preparing data :' + str(i) + "/" + str(df.shape[0])) row = list(df.iloc[i]) document_id = row[dataset_config.GLOBAL_INDEX[dataset_config.ABSTRACTS]["documentIdIndex"]] title = row[dataset_config.GLOBAL_INDEX[dataset_config.ABSTRACTS]["titleIndex"]] abstract = row[dataset_config.GLOBAL_INDEX[dataset_config.ABSTRACTS]["abstractIndex"]] paper_link = row[dataset_config.GLOBAL_INDEX[dataset_config.ABSTRACTS]["researchPaperUrlIndex"]] title_index = ch.transform(title)
def get_all_content(): content = ContentHandler().get_all() for cont in content: cont['_id'] = str(cont['_id']) return json.dumps(content)
def __init__(self): self.es = Elasticsearch() self.ch = ContentHandler()
import sys sys.path.append('../../es/datasets') sys.path.append('../../es/es_core/') sys.path.append('../../top_search/') from content_handler import ContentHandler from flask import Flask, request from workerTask.SearchModel import SearchModel from top_search import top_search_impl app = Flask(__name__, instance_relative_config=True) ch = ContentHandler() modelInst = SearchModel() @app.route('/query') def query(): data = request.get_json() keywords = ch.get_keywords(data["query"]) result = modelInst.getResult(data["query"], " ".join(keywords)) result["keywords"] = keywords return result @app.route('/queryWithoutKey') def queryWithoutKey(): data = request.get_json() result = modelInst.getResult(data["query"], " ".join(data["keywords"])) result["keywords"] = data["keywords"]
def __init__(self): super(ParkingService, self).__init__() self._api_handler = ApiHandler() self._content_handler = ContentHandler() # db_handler is public for testing purpose self.db_handler = DateBaseHandler()
""" Get all unique words. """ import sys import pandas as pd sys.path.append('../es/datasets/') sys.path.append('../es/es_core/') from content_handler import ContentHandler import dataset_config file_path = '' ch = ContentHandler() if len(sys.argv) > 1: file_path = sys.argv[1] else: print('[ERROR] Specify file path in the command line arg') sys.exit(1) def encoding_check(wrd): try: wrd.encode(encoding='utf-8').decode('ascii') return True except Exception: return False def lev2_clean(wd):