Beispiel #1
0
def test_update_content(app):
    # Setup a content document
    input_doc = {
        "data": {
            "id": "test_id",
            "url": "some test url 123",
            "content": {
                "title": "some_test_title",
                "keywords": [{
                    "keyword": "change_from_test",
                    "confidence": 0.2010
                }],
                "texts": ["some test text"]
            }
        }
    }
    prod_col = Config.get_mongo_collection("prod")
    manual_col = Config.get_mongo_collection("manual")
    factory.post_document(input_doc["data"].copy(), prod_col)

    try:
        # Make a change
        new_title = "title has been changed"
        input_doc["data"]["content"]["title"] = new_title
        response = app.test_client().post('/v1/web/content/',
                                          data=json.dumps(input_doc))
        response = app.test_client().get('/v1/web/content/?id=test_id')
        response_doc = json.loads(response.data.decode())

        print(response_doc)
        assert response_doc["manual"]["title"] == new_title
    finally:
        # Delete test content
        factory.delete_document({"id": "test_id"}, manual_col)
        factory.delete_document({"id": "test_id"}, prod_col)
Beispiel #2
0
def test_get_docs_from_url(app):
    # Setup a content document
    input_doc = {
        "data": {
            "id": "test_id_for_url",
            "url": "some test url",
            "content": {
                "title": "some_test_title",
                "keywords": [{
                    "keyword": "change_from_test",
                    "confidence": 0.2010
                }],
                "texts": ["some test text"]
            }
        }
    }
    prod_col = Config.get_mongo_collection("prod")
    factory.post_document(input_doc["data"].copy(), prod_col)

    try:
        response = app.test_client().get('/v1/web/docs/?url=some test url')
        response_json = json.loads(response.data.decode())
        assert response_json[0]["id"] == "test_id_for_url"
    finally:
        factory.delete_document({"id": "test_id_for_url"}, prod_col)
Beispiel #3
0
def test_get_all_conflicts(app):
    # Setup two conflicts
    conflicts = [{
        "conflict_id": "test_conflict_id_{}".format(i),
        "title": "test_conflict_title_{}".format(i)
    } for i in range(2)]

    conflict_col = Config.get_mongo_collection("conflicts")
    # Post both focuments to conflict_ids
    for conflict in conflicts:
        factory.post_document(conflict, conflict_col)

    response = app.test_client().get('/v1/web/conflict_ids')

    try:
        response = app.test_client().get('/v1/web/conflict_ids')
        response_json = json.loads(response.data.decode())

        for conflict in conflicts:
            assert conflict["conflict_id"] in [
                resp["id"] for resp in response_json
            ]
    finally:
        # Delete test conflits
        for conflict in conflicts:
            factory.delete_document({"conflict_id": conflict["conflict_id"]},
                                    conflict_col)
Beispiel #4
0
def check_manually_changed(factory, document):
    if document["manually_changed"]:
        id = document["id"]
        manual_col = Config.get_mongo_collection("manual")
        return next(
            factory.get_database().get_collection(manual_col).find({"id": id}),
            None)
    else:
        return document
Beispiel #5
0
    def get_document(self,
                     query,
                     prod_col=Config.get_mongo_collection("prod"),
                     manual_col=Config.get_mongo_collection("manual"),
                     number_of_docs=30):
        """
        Searches for documents using MongoDB in a given document collection.
        Get 15 results from prod. Get 15 from Manual.
        Go through every doc in prod and delete the ones with
        manually_changed=true.  Then return every remaining document, remember
        it's not sorted now, but for what we need it for this is not necessary.
        """
        prod_col = self.get_collection(prod_col)
        cursor = prod_col.find({'$text': {
            '$search': query
        }}, {'score': {
            '$meta': 'textScore'
        }})
        # Sort and retrieve some of the top scoring documents.
        cursor.sort([('score', {'$meta': 'textScore'})]).limit(number_of_docs)

        docs = []
        for doc in cursor:
            if doc["manually_changed"] is False:
                docs.append(doc)

        manual_col = self.get_collection(manual_col)
        cursor = manual_col.find({'$text': {
            '$search': query
        }}, {'score': {
            '$meta': 'textScore'
        }})
        # Sort and retrieve some of the top scoring documents.
        cursor.sort([('score', {'$meta': 'textScore'})]).limit(number_of_docs)
        for doc in cursor:
            docs.append(doc)

        return docs
Beispiel #6
0
def _handle_not_found(query_text):
    '''
    Inserts this specific query text into the unknown queries collection as
    well as returning a fallback string.
    '''
    try:
        unknown_col = Config.get_mongo_collection("unknown")
        factory.get_database().get_collection(unknown_col).insert_one(
            {"query_text": query_text})
    except pymongo.errors.DuplicateKeyError:
        # If we already have this specific query in the unknown_queries
        # collection we don't need to add it again.
        pass

    return NOT_FOUND
Beispiel #7
0
def test_get_content(app):
    # Setup a content document
    document = {
        "id": "test_content_id",
        "content": "some_test_content",
        "url": "test_url"
    }
    prod_col = Config.get_mongo_collection("prod")
    factory.post_document(document, prod_col)

    try:
        url = "/v1/web/content/?id=test_content_id"
        response = app.test_client().get(url)
        response_json = json.loads(response.data.decode())
        assert response_json["prod"] == "some_test_content"
    finally:
        # Delete test content
        factory.delete_document({"id": "test_content_id"}, prod_col)
Beispiel #8
0
def insert_documents(data):
    """ Insert all provided documents. Checks if the document has been manually
    changed before - if it has, and the new document does not match, it is
    marked as a conflict """
    factory = ModelFactory.get_instance()
    factory.set_db()

    temp_col = Config.get_mongo_collection("temp_scraped")
    manual_col = Config.get_mongo_collection("manual")
    unknown_col = Config.get_mongo_collection("unknown")
    prod_col = Config.get_mongo_collection("prod")
    conflict_col = Config.get_mongo_collection("conflicts")

    print("Starting insertion of {} documents".format(len(data)))
    pbar = ProgressBar()
    for i, doc in enumerate(pbar(data)):
        factory.post_document(doc, temp_col)
    print("Successfully inserted {} documents".format(i + 1))

    manual_docs = factory.get_collection(manual_col).find()

    conflicts = []
    for manual_doc in manual_docs:
        if "id" in manual_doc:
            idx = manual_doc["id"]
        else:
            continue

        # Mark corresponding entry in temp collection as manually changed
        factory.get_database() \
               .get_collection(temp_col) \
               .update_one({"id": idx}, {"$set": {"manually_changed": True}})

        prod_doc = next(factory.get_collection(prod_col).find({"id": idx}),
                        None)
        temp_doc = next(factory.get_collection(temp_col).find({"id": idx}),
                        None)

        if prod_doc and temp_doc:
            if not temp_doc["content"] == prod_doc["content"]:
                title = temp_doc["content"]["title"]
                conflicts.append({"id": idx,
                                  "title": title})

    print("Conflicts: {}".format(conflicts))
    factory.get_collection(conflict_col).create_index([("title", 1)],
                                                      unique=True)
    for conflict in conflicts:
        try:
            factory.post_document(conflict, conflict_col)
        except pymongo.errors.DuplicateKeyError:
            # In case there are dupliacte, unsolved conflicts
            pass

    # Update production collection
    db = factory.get_database()
    try:
        db.get_collection(prod_col).rename("old_prod")
    except pymongo.errors.OperationFailure:
        # If the prod collection does not exist
        pass

    try:
        db.get_collection(temp_col).rename(prod_col)
    except Exception as e:
        print("Failed to update production db collection")
        print(e)
        db.get_collection("old_prod").rename(prod_col)
    finally:
        db.get_collection("old_prod").drop()
        db.get_collection(temp_col).drop()

    # Update all indexes
    factory.set_index(prod_col)
    factory.set_index(manual_col)
    factory.set_index(temp_col)
    # Removes duplicates
    factory.get_collection(unknown_col).create_index([("query_text", 1)],
                                                     unique=True)

    return conflicts
Beispiel #9
0
import pytest
import json

from chatbot.api import server
from chatbot.model.model_factory import ModelFactory
from chatbot.util.config_util import Config

factory = ModelFactory.get_instance()
factory.set_db()

prod_col = Config.get_mongo_collection("prod")
manual_col = Config.get_mongo_collection("manual")
conflict_col = Config.get_mongo_collection("conflicts")
unknown_col = Config.get_mongo_collection("unknown")


@pytest.fixture(scope='module')
def client():
    return server.app.test_client()


def test_swagger(client):
    response = client.get('/')
    assert response.status_code == 200


def test_response(client):
    query = 'some test response'
    try:
        response = client.get('/v2/response/{}/'.format(query))
        assert response.status_code == 200