Example #1
0
def mongodb(logger, request):
    """Set up a mongo connection reset and ready to roll.
    """
    from nozama.cloudsearch.data import db
    from nozama.cloudsearch.service import environ_settings
    cfg = dict(
        db_name='unittesting-db',
        port=environ_settings.MONGO_PORT(),
        host=environ_settings.MONGO_HOST(),
    )
    logger.debug("MongoDB config<{0}>".format(cfg))
    db.init(cfg)
    db.db().hard_reset()
Example #2
0
def search(query={}):
    """Perform a search across text fields.

    :returns: A dict compatible with an Amazon CloudSearch response.

    """
    log = get_log('search')
    es = get_es()

    qstring = query.get('q', '')
    log.debug("searching query '{0}'".format(query))
    formatType = query.get('format', '')

    # try:
    if qstring:
        query = {"query": {"query_string": {"query": u"{0}*".format(qstring)}}}
        results = es.conn.search(index=es.index, body=query)

    else:
        query = {"query": {"match_all": {}}}
        results = es.conn.search(index=es.index, body=query)

    # except ElasticHttpNotFoundError:
    #    # No documents present in store. Don't worry about it there's nothing
    #    # to search
    #    results = dict(
    #        hits=dict(hits=[], total=0),
    #        took=0,
    #    )

    hit = []
    conn = db().conn()
    for i in results['hits']['hits']:
        query = dict(_id=i['_id'])
        fields = conn.documents.find_one(query)['fields']
        if formatType == u'sdk':
            for key, value in fields.items():
                if not isinstance(value, list):
                    fields[key] = [value]
        hit.append({'id': i['_id'], 'fields': fields})

    rc = {
        "rank": "-text_relevance",
        "match-expr": u"(label '{0}')".format(qstring),
        "hits": {
            "found": results['hits']['total']['value'],
            "start": 0,
            "hit": hit
        },
        "info": {
            "rid": binascii.hexlify(os.urandom(40)).decode(),
            "time-ms": results['took'],
            "cpu-time-ms": 0
        }
    }

    log.debug("found '{0}'".format(rc))

    return rc
def removed():
    """Return all the documents which have been removed.
    """
    log = get_log("removed")
    conn = db().conn()

    returned = list(conn.documents_removed.find())
    log.debug("Returning '{0}' documents".format(len(returned)))

    return returned
def all():
    """Return all the stored documents.
    """
    log = get_log("all")
    conn = db().conn()

    returned = list(conn.documents.find())
    log.debug("Returning '{0}' documents".format(len(returned)))

    return returned
Example #5
0
def removed():
    """Return all the documents which have been removed.
    """
    log = get_log('removed')
    conn = db().conn()

    returned = list(conn.documents_removed.find())
    log.debug("Returning '{0}' documents".format(len(returned)))

    return returned
Example #6
0
def all():
    """Return all the stored documents.
    """
    log = get_log('all')
    conn = db().conn()

    returned = list(conn.documents.find())
    log.debug("Returning '{0}' documents".format(len(returned)))

    return returned
def remove_all():
    """Remove all store documents.
    """
    log = get_log("remove_all")
    conn = db().conn()
    conn.documents.drop()
    conn.documents_removed.drop()
    log.warn("all documents have been removed.")
    # Remove al of the documents from elasticsearch as well.
    get_es().hard_reset()
    log.warn("All indexes removed from elasticsearch.")
Example #8
0
def remove_all():
    """Remove all store documents.
    """
    log = get_log('remove_all')
    conn = db().conn()
    conn.documents.drop()
    conn.documents_removed.drop()
    log.warn("all documents have been removed.")
    # Remove al of the documents from elasticsearch as well.
    get_es().hard_reset()
    log.warn("All indexes removed from elasticsearch.")
def load(docs_to_load):
    """Load documents in the Amazon SDF an add/remove from mongo accordingly.

    Each document will be validated against DocSchema.

    :returns: An amazon compatible documents/batch Response Property dict.

    For example:

    .. code-block:: python

        rc = dict(
            status='ok',
            adds=len(to_load),
            deletes=len(to_remove),
            error='',
            warning='',
        )

    Reference:
      * http://docs.aws.amazon.com/cloudsearch/latest/developerguide/\
            DocumentsBatch.JSON.html#DocumentsBatch.JSON.ResponseProperties

    """
    log = get_log("load")
    conn = db().conn()

    to_load = []
    to_remove = []

    # Validate the data first then bulk add/remove if all goes well.
    for doc in docs_to_load:
        # validate against what amazon would expect from the SDF.
        doc = DOC_SCHEMA.to_python(doc)
        if doc["type"] == "add":
            # used the doc's id as the unique id for the mongodb document.
            doc["_id"] = doc["id"]
            # not need in storage:
            doc.pop("type")
            log.debug("to_load: {0}".format(doc))
            to_load.append(doc)

        else:
            # remove
            log.debug("to remove: '{0}'".format(doc))
            to_remove.append(doc)

    if to_load:
        log.debug("bulk loading: '{0}' document(s)".format(len(to_load)))
        conn.documents.insert(to_load)
        for doc in to_load:
            add_to_elasticsearch(doc)

    if to_remove:
        doc_ids = [doc["id"] for doc in to_remove]

        # Recover the documents that have been removed in this upload and
        # store it on the removed list.
        for doc_id in doc_ids:
            query = dict(_id=doc_id)
            found = conn.documents.find_one(query)
            if found:
                log.debug("adding to remove store: '{0}'".format(query))
                conn.documents_removed.insert(found)
                conn.documents.remove(query)

    rc = dict(status="ok", adds=len(to_load), deletes=len(to_remove), error="", warning="")

    return rc
Example #10
0
def load(docs_to_load):
    """Load documents in the Amazon SDF an add/remove from mongo accordingly.

    Each document will be validated against DocSchema.

    :returns: An amazon compatible documents/batch Response Property dict.

    For example:

    .. code-block:: python

        rc = dict(
            status='ok',
            adds=len(to_load),
            deletes=len(to_remove),
            error='',
            warning='',
        )

    Reference:
      * http://docs.aws.amazon.com/cloudsearch/latest/developerguide/\
            DocumentsBatch.JSON.html#DocumentsBatch.JSON.ResponseProperties

    """
    log = get_log('load')
    conn = db().conn()

    to_load = []
    to_remove = []

    # Validate the data first then bulk add/remove if all goes well.
    for doc in docs_to_load:
        # validate against what amazon would expect from the SDF.
        doc = DOC_SCHEMA.to_python(doc)
        if doc['type'] == "add":
            # used the doc's id as the unique id for the mongodb document.
            doc['_id'] = doc['id']
            # not need in storage:
            doc.pop('type')
            log.debug("to_load: {0}".format(doc))
            to_load.append(doc)

        else:
            # remove
            log.debug("to remove: '{0}'".format(doc))
            to_remove.append(doc)

    if to_load:
        log.debug("bulk loading: '{0}' document(s)".format(len(to_load)))
        for doc in to_load:
            conn.documents.update({'_id': doc['id']}, doc, True)
            add_to_elasticsearch(doc)

    if to_remove:
        # Recover the documents that have been removed in this upload and
        # store it on the removed list.
        for doc in to_remove:
            doc_id = doc['id']
            query = dict(_id=doc_id)
            found = conn.documents.find_one(query)
            if found:
                log.debug("adding to remove store: '{0}'".format(query))
                conn.documents_removed.insert(found)
                conn.documents.remove(query)
                remove_from_elasticsearch(doc)

    rc = dict(
        status='ok',
        adds=len(to_load),
        deletes=len(to_remove),
        error='',
        warning='',
    )

    return rc
def search(query={}):
    """Perform a search across text fields.

    :returns: A dict compatible with an Amazon CloudSearch response.

    """
    log = get_log('search')
    es = get_es()

    qstring = query.get('q', '')
    log.debug("searching query '{0}'".format(query))
    formatType = query.get('format', '')

    try:
        if qstring:
            query = {
                "query": {
                    "query_string": {
                        "query": u"{0}*".format(qstring)
                    }
                }
            }
            results = es.conn.search(query, index=es.index)

        else:
            query = {"query": {"match_all": {}}}
            results = es.conn.search(query, index=es.index)

    except ElasticHttpNotFoundError:
        # No documents present in store. Don't worry about it there's nothing
        # to search
        results = dict(
            hits=dict(hits=[], total=0),
            took=0,
        )

    hit = []
    conn = db().conn()
    for i in results['hits']['hits']:
        query = dict(_id=i['_id'])
        fields = conn.documents.find_one(query)['fields']
        if formatType == u'sdk':
            for key, value in fields.items():
                if not isinstance(value, list):
                    fields[key] = [value]
        hit.append({'id': i['_id'], 'fields': fields})

    rc = {
        "rank": "-text_relevance",
        "match-expr": u"(label '{0}')".format(qstring),
        "hits": {
            "found": results['hits']['total'],
            "start": 0,
            "hit": hit
        },
        "info": {
            "rid": os.urandom(40).encode('hex'),
            "time-ms": results['took'],
            "cpu-time-ms": 0
        }
    }

    log.debug("found '{0}'".format(rc))

    return rc
Example #12
0
def mongodb(request):
    """Set up a mongo connection reset and ready to roll.
    """
    from nozama.cloudsearch.data import db
    db.init(dict(db_name='unittesting-db'))
    db.db().hard_reset()