Ejemplo n.º 1
0
    def results(self):
        '''Returns results as a JSON encodable Python value.

        This calls :meth:`SearchEngine.recommendations` and converts
        the results returned into JSON encodable values. Namely,
        feature collections are slimmed down to only features that
        are useful to an end-user.
        '''
        results = self.recommendations()
        transformed = []
        for t in results['results']:
            if len(t) == 2:
                cid, fc = t
                info = {}
            elif len(t) == 3:
                cid, fc, info = t
            else:
                bottle.abort(500, 'Invalid search result: "%r"' % t)
            result = info
            result['content_id'] = cid
            if not self.params['omit_fc']:
                result['fc'] = util.fc_to_json(fc)
            transformed.append(result)
        results['results'] = transformed
        return results
Ejemplo n.º 2
0
    def results(self):
        '''Returns results as a JSON encodable Python value.

        This calls :meth:`SearchEngine.recommendations` and converts
        the results returned into JSON encodable values. Namely,
        feature collections are slimmed down to only features that
        are useful to an end-user.
        '''
        results = self.recommendations()
        transformed = []
        for t in results['results']:
            if len(t) == 2:
                cid, fc = t
                info = {}
            elif len(t) == 3:
                cid, fc, info = t
            else:
                bottle.abort(500, 'Invalid search result: "%r"' % t)
            result = info
            result['content_id'] = cid
            if not self.params['omit_fc']:
                result['fc'] = util.fc_to_json(fc)
            transformed.append(result)
        results['results'] = transformed
        return results
Ejemplo n.º 3
0
def v1_fc_get(visid_to_dbid, store, cid):
    """Retrieve a single feature collection.

    The route for this endpoint is:
    ``/dossier/v1/feature-collections/<content_id>``.

    This endpoint returns a JSON serialization of the feature collection
    identified by ``content_id``.
    """
    fc = store.get(visid_to_dbid(cid))
    if fc is None:
        bottle.abort(404, 'Feature collection "%s" does not exist.' % cid)
    return util.fc_to_json(fc)
Ejemplo n.º 4
0
def v1_fc_get(visid_to_dbid, store, cid):
    '''Retrieve a single feature collection.

    The route for this endpoint is:
    ``/dossier/v1/feature-collections/<content_id>``.

    This endpoint returns a JSON serialization of the feature collection
    identified by ``content_id``.
    '''
    fc = store.get(visid_to_dbid(cid))
    if fc is None:
        bottle.abort(404, 'Feature collection "%s" does not exist.' % cid)
    return util.fc_to_json(fc)
Ejemplo n.º 5
0
def v1_fc_put(request, response, store, kvlclient, tfidf, cid):
    '''Store a single feature collection.

    The route for this endpoint is:
    ``PUT /dossier/v1/feature-collections/<content_id>``.

    ``content_id`` is the id to associate with the given feature
    collection. The feature collection should be in the request
    body serialized as JSON.

    Alternatively, if the request's ``Content-type`` is
    ``text/html``, then a feature collection is generated from the
    HTML. The generated feature collection is then returned as a
    JSON payload.

    This endpoint returns status ``201`` upon successful
    storage otherwise. An existing feature collection with id
    ``content_id`` is overwritten.
    '''
    tfidf = tfidf or None
    if request.headers.get('content-type', '').startswith('text/html'):
        url = urllib.unquote(cid.split('|', 1)[1])
        fc = etl.create_fc_from_html(url, request.body.read(), tfidf=tfidf)
        logger.info('created FC for %r', cid)
        store.put([(cid, fc)])
        return fc_to_json(fc)
    else:
        fc = FeatureCollection.from_dict(json.load(request.body))
        keywords = set()
        for subid in fc:
            if subid.startswith('subtopic'):
                ty = subtopic_type(subid)
                if ty in ('text', 'manual'):
                    # get the user selected string
                    data = typed_subtopic_data(fc, subid)
                    map(keywords.add, cleanse(data).split())
                    keywords.add(cleanse(data))

        folders = Folders(kvlclient)
        for fid, sid in folders.parent_subfolders(cid):
            if not isinstance(fid, unicode):
                fid = fid.decode('utf8')
            if not isinstance(sid, unicode):
                sid = sid.decode('utf8')
            keywords.add(cleanse(fid))
            keywords.add(cleanse(sid))

        fc[u'keywords'] = StringCounter(keywords)
        store.put([(cid, fc)])
        response.status = 201
Ejemplo n.º 6
0
def v1_fc_put(request, response, store, kvlclient, tfidf, cid):
    '''Store a single feature collection.

    The route for this endpoint is:
    ``PUT /dossier/v1/feature-collections/<content_id>``.

    ``content_id`` is the id to associate with the given feature
    collection. The feature collection should be in the request
    body serialized as JSON.

    Alternatively, if the request's ``Content-type`` is
    ``text/html``, then a feature collection is generated from the
    HTML. The generated feature collection is then returned as a
    JSON payload.

    This endpoint returns status ``201`` upon successful
    storage otherwise. An existing feature collection with id
    ``content_id`` is overwritten.
    '''
    tfidf = tfidf or None
    if request.headers.get('content-type', '').startswith('text/html'):
        url = urllib.unquote(cid.split('|', 1)[1])
        fc = etl.create_fc_from_html(url, request.body.read(), tfidf=tfidf)
        logger.info('created FC for %r', cid)
        store.put([(cid, fc)])
        return fc_to_json(fc)
    else:
        fc = FeatureCollection.from_dict(json.load(request.body))
        keywords = set()
        for subid in fc:
            if subid.startswith('subtopic'):
                ty = subtopic_type(subid)
                if ty in ('text', 'manual'):
                    # get the user selected string
                    data = typed_subtopic_data(fc, subid)
                    map(keywords.add, cleanse(data).split())
                    keywords.add(cleanse(data))

        folders = Folders(kvlclient)
        for fid, sid in folders.parent_subfolders(cid):
            if not isinstance(fid, unicode):
                fid = fid.decode('utf8')
            if not isinstance(sid, unicode):
                sid = sid.decode('utf8')
            keywords.add(cleanse(fid))
            keywords.add(cleanse(sid))

        fc[u'keywords'] = StringCounter(keywords)
        store.put([(cid, fc)])
        response.status = 201
Ejemplo n.º 7
0
def v1_random_fc_get(response, dbid_to_visid, store):
    """Retrieves a random feature collection from the database.

    The route for this endpoint is:
    ``GET /dossier/v1/random/feature-collection``.

    Assuming the database has at least one feature collection,
    this end point returns an array of two elements. The first
    element is the content id and the second element is a
    feature collection (in the same format returned by
    :func:`dossier.web.routes.v1_fc_get`).

    If the database is empty, then a 404 error is returned.

    Note that currently, this may not be a uniformly random sample.
    """
    # Careful, `store.scan()` would be obscenely slow here...
    sample = streaming_sample(store.scan_ids(), 1, 1000)
    if len(sample) == 0:
        bottle.abort(404, "The feature collection store is empty.")
    return [dbid_to_visid(sample[0]), util.fc_to_json(store.get(sample[0]))]
Ejemplo n.º 8
0
def v1_random_fc_get(response, dbid_to_visid, store):
    '''Retrieves a random feature collection from the database.

    The route for this endpoint is:
    ``GET /dossier/v1/random/feature-collection``.

    Assuming the database has at least one feature collection,
    this end point returns an array of two elements. The first
    element is the content id and the second element is a
    feature collection (in the same format returned by
    :func:`dossier.web.routes.v1_fc_get`).

    If the database is empty, then a 404 error is returned.

    Note that currently, this may not be a uniformly random sample.
    '''
    # Careful, `store.scan()` would be obscenely slow here...
    sample = streaming_sample(store.scan_ids(), 1, 1000)
    if len(sample) == 0:
        bottle.abort(404, 'The feature collection store is empty.')
    return [dbid_to_visid(sample[0]), util.fc_to_json(store.get(sample[0]))]