def memcache_indexer_keywords_distinct(self, term=None):
        """
        Get or set in the memcache the keywords indexed with count.

        If term is not set, it returns the full index, else returns the
        ancestorship of a term.

        :return: a Query()
        """
        mkey = _MEMCACHE_SLUGS['INDEXER_DISTINCT'] + str(term)
        if not memcache.get(key=mkey):
            if not term:
                query = Indexer.query(projection=[Indexer.keyword], distinct=True)
                results = {
                    "indexed": [
                        {
                            "keyword": q.keyword,
                            "count": Indexer.query(Indexer.keyword == q.keyword).count()
                        }
                        for q in query
                    ],
                    "n_indexed": query.count()
                }
                memcache.add(key=mkey, value=results)
            else:
                try:
                    results = TextSemantics.find_term_ancestorship(term)
                except Exception as e:
                    raise ValueError(str(e))
                memcache.add(key=mkey, value=results)
        else:
            results = memcache.get(key=mkey)

        return results
 def execute_task(self, *args):
     item, key = args
     from flankers.textsemantics import find_related_concepts
     if not (item.title == '' and item.abstract == ''):
         # if item is not a media or a link from Twitter
         # it is or a feed or a tweet
         text = item.abstract if len(item.abstract) != 0 else item.title
         labels = find_related_concepts(text)
         for l in labels:
             if Indexer.query().filter(Indexer.webres == key).count() == 0:
                 index = Indexer(keyword=l.strip(), webres=key)
                 index.put()
                 print "indexing stored: " + item.url + ">" + l
예제 #3
0
 def execute_task(self, *args):
     item, key = args
     from flankers.textsemantics import find_related_concepts
     if not (item.title == '' and item.abstract == ''):
         # if item is not a media or a link from Twitter
         # it is or a feed or a tweet
         text = item.abstract if len(item.abstract) != 0 else item.title
         labels = find_related_concepts(text)
         for l in labels:
             if Indexer.query().filter(Indexer.webres == key).count() == 0:
                 index = Indexer(keyword=l.strip(), webres=key)
                 index.put()
                 print "indexing stored: " + item.url + ">" + l
def memcache_articles_by_keyword(kwd):
    """
    Get or set in the memcache articles related to a given keyword
    :param kwd: a keyword
    :return: a list
    """
    mkey = "Keywords_" + kwd
    if not memcache.get(key=mkey):
        results = Indexer.get_webresource(kwd)
        memcache.add(key=mkey, value=results)
    else:
        results = memcache.get(key=mkey)

    return results
def memcache_articles_by_keyword(kwd):
    """
    Get or set in the memcache articles related to a given keyword
    :param kwd: a keyword
    :return: a list
    """
    mkey = "Keywords_" + kwd
    if not memcache.get(key=mkey):
        results = Indexer.get_webresource(kwd)
        memcache.add(key=mkey, value=results)
    else:
        results = memcache.get(key=mkey)

    return results
예제 #6
0
 def execute_task(self, *args):
     """
     Index an article.
     See Indexer class in models.
     :param args: single object to index and its key
     :return: None
     """
     item, key = args
     from flankers.textsemantics import TextSemantics
     if not (item.title == '' and item.abstract == ''):
         # if item is not a media or a link from Twitter
         # it is or a feed or a tweet
         text = item.abstract if len(item.abstract) != 0 else item.title
         text = text[:1799] if len(text) >= 1800 else text
         if Indexer.query().filter(Indexer.webres == key).count() == 0:
             semantics = TextSemantics(text)
             labels = semantics.find_related_concepts()
             for l in labels:
                     index = Indexer(keyword=l.strip(), webres=key)
                     index.put()
                     print "indexing stored: " + item.url + ">" + l
         else:
             raise Exception("storeIndexer(): Resource already indexed")
    def memcache_articles_by_keyword(self, kwd):
        """
        Get or set in the memcache articles related to a given keyword.

        GET /articles/<version>/?keyword=<some keyword>
        :param kwd: a keyword
        :return: a list
        """
        mkey = _MEMCACHE_SLUGS['KWD_BY_ARTICLE'] + kwd
        if not memcache.get(key=mkey):
            results = Indexer.get_webresource(kwd)
            memcache.add(key=mkey, value=results)
        else:
            results = memcache.get(key=mkey)

        return results
    def get(self, name):
        """
        Handles WebResource
        """
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        self.response.headers['Content-Type'] = 'application/json'

        if self.request.get('token') == _CLIENT_TOKEN:
            if (name == 'webresource' or name == 'indexer') and self.request.get('retrieve'):
                # respond with a single resource of the requested kind
                resource = self.retrieve_a_single_resource(self.request.get('retrieve'), kind=name)
                print type(resource)
                resource = resource.dump_to_json() if resource else None
                return self.response.write(
                    json.dumps(resource)
                ) if resource else self.json_error_handler(404, '?retrieve=ID Wrong ID')
            elif (name == 'webresource' or name == 'indexer') and self.request.get('index'):
                # RETRIEVE a index of WebResource (list of all keys presents in the datastore, paginated)
                from articlesjsonapi import memcache_webresource_query

                query = memcache_webresource_query()

                # Forked from https://github.com/GoogleCloudPlatform/appengine-paging-python
                page_size = 25
                cursor = None
                next_bookmark = None
                bookmark = self.request.get('bookmark')

                if bookmark and bookmark != '':
                    # if bookmark is set, serve the part of the cursor from the given bookamrk plus the page size
                    cursor = ndb.Cursor.from_websafe_string(bookmark)

                articles, next_cursor, more = query.fetch_page(page_size, start_cursor=cursor)

                # assign the key for the next cursor
                if more:
                    next_bookmark = next_cursor.to_websafe_string()

                listed = {'articles': [
                    webres.key.id()
                    for webres in articles
                ],
                'next': next_bookmark if next_bookmark else None}

                return self.response.write(
                    json.dumps(listed)
                )
            elif name == 'concepts' and self.request.get('retrieve'):
                # RETRIEVE keywords related to a WebResource
                from datastore.models import WebResource, Indexer

                # Find concepts related to a WebResource
                resource = self.retrieve_a_single_resource(self.request.get('retrieve'))
                concepts = Indexer.query().filter(Indexer.webres == resource.key)

                listed = {'concepts': [
                    concept.keyword.replace(" ", "+")
                    for concept in concepts
                ],
                'resource_id': resource.key.id()
                }

                return self.response.write(
                    json.dumps(listed)
                )
            else:
                return self.response.write(self.json_error_handler(404))
        else:
            return self.response.write(
                self.json_error_handler(405, exception='Not authorized')
            )