Example #1
0
class QueryHandler:
    # TODO: Query Interface for deep doc search.

    def __init__(self):
        self.es = Elasticsearch()
        self.ch = ContentHandler()

    def lite_search(self, query):
        if not isinstance(query, str):
            return []
        processed_query = self.ch.transform(query)
        if processed_query == '':
            return []
        # TODO: Proper tf-idf freq.
        res = self.es.search(
            {
                "query": {
                    "more_like_this": {
                        "fields": ["title_index", "abstract_index"],
                        "like": processed_query,
                        "min_term_freq": 1,
                        "max_query_terms": 15,
                        "min_doc_freq": 1
                    }
                }
            },
            index=constants.DEFAULT_LITE_INDEX)
        return res["hits"]["hits"]
class ParkingService(Base):
    def __init__(self):
        super(ParkingService, self).__init__()
        self._api_handler = ApiHandler()
        self._content_handler = ContentHandler()
        # db_handler is public for testing purpose
        self.db_handler = DateBaseHandler()

    # TODO: implement functionality for multiple images
    def check_plate(self, image_name):
        response = self._api_handler.post_by_file_name(image_name)
        text = self._content_handler.get_parsed_text(response)

        is_allowed = not self._is_transport_with_no_letters(text) and \
            not self._is_public_transport(text) and \
            not self._is_military_transport(text)

        self._store_result(text, is_allowed)
        self.debug_info('Plate', text)
        self._print_result(is_allowed)

        # Returning text and status for testing purpose
        return [text, is_allowed]

    def set_wrong_api_url(self):
        """
        For testing purpose ONLY
        """
        self._api_handler = ApiHandler(
            api_url='https://api.ocr.space/parse/imageasd')

    def _is_public_transport(self, text):
        last_symbol = text[-1]
        return last_symbol == 'G' or last_symbol == '6'

    def _is_military_transport(self, text):
        return 'L' in text or 'M' in text

    def _is_transport_with_no_letters(self, text):
        return text.isdecimal()

    def _store_result(self, plate, is_allowed):
        if is_allowed:
            status = 'allowed'
        else:
            status = 'rejected'
        self.db_handler.store_plate_info(plate, status)

    def _print_result(self, is_allowed):
        if is_allowed:
            print('You may park here')
            return

        print('YOU SHALL NOT PASS')
Example #3
0
def add_content():
    """
    Send user choice on this scene

    Data expected:
    'title': "default",
    'scenes':{
        'content': 'firstSceneVideo.mp4',
        'choices': {'good': 'Une Demi', 'bad': "Un demi"},
        'good': {'content': "MyGoodAnswer.mp4", 'reason': "Yeah cheers to that"},
        'bad': {'content': "MyBadAnswre.mp4", 'reason': "You should know that, drink more"}
    }
    """
    data = json.loads(request.data)
    password = data.get('password')

    log.info("admin request with password: %s" % password)

    with open('config.json', 'r') as f:
        if password != json.load(f).get('admin', {}).get('password', ''):
            raise InvalidUsage("Passord credentials required", status_code=401)

    data = data['content']
    content = {
        'title': data['title'],
        'scenes': [{
            'content': scene['content'],
            'choices': scene['choices'],
            'good': scene['good'],
            'bad': scene['bad']
        } for scene in data['scenes']]
    }
    content_id = data.get('_id')
    if content_id:
        log.debug("updating content %s" % content_id)
        ContentHandler().update_content(content_id, content)
    else:
        log.debug("New content")
        ContentHandler().add_content(content)
    return json.dumps({})
 def new_game(self, user_id):
     content = ContentHandler().get_content(played=self.get_user_history(user_id))
     battle_tag = "%s#%d" % (UserHandler().get_user(user_id)['username'], random.randint(1111, 9999))
     entry = {
         'battle_tag': battle_tag,
         'content_id': content['_id'],
         'users': {user_id: {'result': []}},
         'status': 'requested'
     }
     result = self.game.find_one({'_id': self.game.insert_one(entry).inserted_id})
     result['title'] = content['title']
     log.info(result)
     return result
 def get_player_history(self, user_id):
     history = []
     key = "users.%s" % user_id
     games = self.game.find({key: {'$exists': 1}})
     for game in games:
         content = ContentHandler().get_content(content_id=game['content_id'])
         res = {'score': [], 'opponents': {}, 'title': content['title']}
         for key in game['users']:
             if user_id == key:
                 res['score'] = game['users'][key]['result']
             else:
                 name = UserHandler().get_user(key)['username']
                 res['opponents'][name] = game['users'][key]['result']
         if len(res['score']) > 0 or len(res['opponents'].keys()) > 1:
             history.append(res)
     return history
 def get_scene(self, user_id, game_id):
     game = self.game.find_one({'_id': ObjectId(game_id)})
     log.info(game)
     scene_id = len(game['users'][user_id]['result'])
     return ContentHandler().get_content_scene(game['content_id'], scene_id)
"""

import sys
from pprint import pprint

import pandas as pd
from elasticsearch import Elasticsearch

import constants
from content_handler import ContentHandler

sys.path.append('../datasets/')
import dataset_config

es = Elasticsearch()
ch = ContentHandler()

es.indices.create(constants.DEFAULT_LITE_INDEX, ignore=[constants.ErrorConstants.INDEX_EXISTS])
print('[INFO] Index created')
print('[INFO] Reading abstracts')
docs_to_be_indexed = []
df = pd.read_csv(constants.ABSTRACTS_CSV_PATH)

for i in range(0, df.shape[0]):
    print('[INFO] Preparing data  :' + str(i) + "/" + str(df.shape[0]))
    row = list(df.iloc[i])
    document_id = row[dataset_config.GLOBAL_INDEX[dataset_config.ABSTRACTS]["documentIdIndex"]]
    title = row[dataset_config.GLOBAL_INDEX[dataset_config.ABSTRACTS]["titleIndex"]]
    abstract = row[dataset_config.GLOBAL_INDEX[dataset_config.ABSTRACTS]["abstractIndex"]]
    paper_link = row[dataset_config.GLOBAL_INDEX[dataset_config.ABSTRACTS]["researchPaperUrlIndex"]]
    title_index = ch.transform(title)
Example #8
0
def get_all_content():
    content = ContentHandler().get_all()
    for cont in content:
        cont['_id'] = str(cont['_id'])
    return json.dumps(content)
Example #9
0
 def __init__(self):
     self.es = Elasticsearch()
     self.ch = ContentHandler()
Example #10
0
import sys

sys.path.append('../../es/datasets')
sys.path.append('../../es/es_core/')
sys.path.append('../../top_search/')
from content_handler import ContentHandler
from flask import Flask, request

from workerTask.SearchModel import SearchModel
from top_search import top_search_impl

app = Flask(__name__, instance_relative_config=True)
ch = ContentHandler()

modelInst = SearchModel()


@app.route('/query')
def query():
    data = request.get_json()
    keywords = ch.get_keywords(data["query"])
    result = modelInst.getResult(data["query"], " ".join(keywords))
    result["keywords"] = keywords
    return result


@app.route('/queryWithoutKey')
def queryWithoutKey():
    data = request.get_json()
    result = modelInst.getResult(data["query"], " ".join(data["keywords"]))
    result["keywords"] = data["keywords"]
 def __init__(self):
     super(ParkingService, self).__init__()
     self._api_handler = ApiHandler()
     self._content_handler = ContentHandler()
     # db_handler is public for testing purpose
     self.db_handler = DateBaseHandler()
Example #12
0
"""
Get all unique words.
"""

import sys

import pandas as pd

sys.path.append('../es/datasets/')
sys.path.append('../es/es_core/')
from content_handler import ContentHandler
import dataset_config

file_path = ''
ch = ContentHandler()
if len(sys.argv) > 1:
    file_path = sys.argv[1]
else:
    print('[ERROR] Specify file path in the command line arg')
    sys.exit(1)


def encoding_check(wrd):
    try:
        wrd.encode(encoding='utf-8').decode('ascii')
        return True
    except Exception:
        return False


def lev2_clean(wd):