def get(self): try: limit = int(self.get_argument("limit", 10)) except ValueError: raise tornado.web.HTTPError(400) query = {} # Search for keywords after stemming if supplied keywords = self.get_argument("keywords", None) if keywords: words = map(lambda k: k.lower(), keywords.split(",")) words = map(lambda w: ClassificationObject.stem(w), words) query["stemmed_keywords"] = {"$all": words} # Search for tags if supplied tags = self.get_argument("tags", None) if tags: tags = map(lambda t: t.lower(), tags.split(",")) query["tags"] = {"$all": tags} else: # Otherwise filter by tagged or untagged tagged = self.get_argument("tagged", False) if tagged: query["tags"] = {"$ne": []} else: query["tags"] = [] results = ClassificationObject.find(query=query, limit=limit, sort=[("last_modified", pymongo.DESCENDING)]) dicts = [c.to_dict() for c in results] json = simplejson.dumps(dicts, default=nosy.util.json_serializer) self.set_header("Content-Type", "application/json") self.write(json)
def _get_from_db(cls): objects = ClassificationObject.find({'tags': {'$ne': []}}) data = [] for c in objects: keywords = c.keywords tags = c.tags keywords = [w for w in keywords] for tag in tags: data.append((keywords, tag)) return data
def _get_from_db(cls): objects = ClassificationObject.find( {'tags': {'$ne' : []}}) data = [] for c in objects: keywords = c.keywords tags = c.tags keywords = [w for w in keywords] for tag in tags: data.append((keywords, tag)) return data
def load_features(self): self.features = {} for tag in self.TAGS: self.features[tag] = [] # Positive features tagged = ClassificationObject.find( { 'tags' : tag } ) for c in tagged: bag_of_words = self._to_feature(c) positive_feature = (bag_of_words, tag) self.features[tag].append(positive_feature) # Negative features - we limit these to the same number as positive features untagged_limit = self.NEG_FEATURE_MULTIPLIER*len(self.features[tag]) untagged = ClassificationObject.find( { 'tags' : { '$ne' : tag } }, limit=untagged_limit) for c in untagged: bag_of_words = {} for k in c.keywords: bag_of_words[k] = True negative_feature = (bag_of_words, "!" + tag) self.features[tag].append(negative_feature)
def get(self): try: limit = int(self.get_argument('limit', 10)) except ValueError: raise tornado.web.HTTPError(400) query = {} # Search for keywords after stemming if supplied keywords = self.get_argument('keywords', None) if keywords: words = map(lambda k: k.lower(), keywords.split(',')) words = map(lambda w: ClassificationObject.stem(w), words) query['stemmed_keywords'] = {'$all': words} # Search for tags if supplied tags = self.get_argument('tags', None) if tags: tags = map(lambda t: t.lower(), tags.split(',')) query['tags'] = {'$all': tags} else: # Otherwise filter by tagged or untagged tagged = self.get_argument('tagged', False) if tagged: query['tags'] = {'$ne': []} else: query['tags'] = [] results = ClassificationObject.find(query=query, limit=limit, sort=[("last_modified", pymongo.DESCENDING)]) dicts = [c.to_dict() for c in results] json = simplejson.dumps(dicts, default=nosy.util.json_serializer) self.set_header("Content-Type", "application/json") self.write(json)