예제 #1
0
    def get(self):
        try:
            limit = int(self.get_argument("limit", 10))
        except ValueError:
            raise tornado.web.HTTPError(400)

        query = {}

        # Search for keywords after stemming if supplied
        keywords = self.get_argument("keywords", None)
        if keywords:
            words = map(lambda k: k.lower(), keywords.split(","))
            words = map(lambda w: ClassificationObject.stem(w), words)
            query["stemmed_keywords"] = {"$all": words}

        # Search for tags if supplied
        tags = self.get_argument("tags", None)
        if tags:
            tags = map(lambda t: t.lower(), tags.split(","))
            query["tags"] = {"$all": tags}
        else:
            # Otherwise filter by tagged or untagged
            tagged = self.get_argument("tagged", False)
            if tagged:
                query["tags"] = {"$ne": []}
            else:
                query["tags"] = []

        results = ClassificationObject.find(query=query, limit=limit, sort=[("last_modified", pymongo.DESCENDING)])

        dicts = [c.to_dict() for c in results]
        json = simplejson.dumps(dicts, default=nosy.util.json_serializer)

        self.set_header("Content-Type", "application/json")
        self.write(json)
예제 #2
0
 def _get_from_db(cls):
     objects = ClassificationObject.find({'tags': {'$ne': []}})
     data = []
     for c in objects:
         keywords = c.keywords
         tags = c.tags
         keywords = [w for w in keywords]
         for tag in tags:
             data.append((keywords, tag))
     return data
예제 #3
0
파일: train.py 프로젝트: byouloh/nosy
	def _get_from_db(cls):
		objects = ClassificationObject.find( {'tags': {'$ne' : []}})
		data = []
		for c in objects:
			keywords = c.keywords
			tags = c.tags
			keywords = [w for w in keywords] 
			for tag in tags:
				data.append((keywords, tag))
		return data
예제 #4
0
    def load_features(self):
        self.features = {}
        for tag in self.TAGS: 
            self.features[tag] = []
            
            # Positive features
            tagged = ClassificationObject.find( { 'tags' : tag } )
            for c in tagged:
                bag_of_words = self._to_feature(c)
                positive_feature = (bag_of_words, tag)
                self.features[tag].append(positive_feature)

            # Negative features - we limit these to the same number as positive features
            untagged_limit = self.NEG_FEATURE_MULTIPLIER*len(self.features[tag])
            untagged = ClassificationObject.find( { 'tags' : { '$ne' : tag } }, 
                limit=untagged_limit)
            for c in untagged:
                bag_of_words = {}
                for k in c.keywords: bag_of_words[k] = True

                negative_feature = (bag_of_words, "!" + tag)
                self.features[tag].append(negative_feature)
예제 #5
0
    def get(self):
        try:
            limit = int(self.get_argument('limit', 10))
        except ValueError:
            raise tornado.web.HTTPError(400)

        query = {}

        # Search for keywords after stemming if supplied
        keywords = self.get_argument('keywords', None)
        if keywords:
            words = map(lambda k: k.lower(), keywords.split(','))
            words = map(lambda w: ClassificationObject.stem(w), words)
            query['stemmed_keywords'] = {'$all': words}

        # Search for tags if supplied
        tags = self.get_argument('tags', None)
        if tags:
            tags = map(lambda t: t.lower(), tags.split(','))
            query['tags'] = {'$all': tags}
        else:
            # Otherwise filter by tagged or untagged
            tagged = self.get_argument('tagged', False)
            if tagged:
                query['tags'] = {'$ne': []}
            else:
                query['tags'] = []

        results = ClassificationObject.find(query=query,
                                            limit=limit,
                                            sort=[("last_modified",
                                                   pymongo.DESCENDING)])

        dicts = [c.to_dict() for c in results]
        json = simplejson.dumps(dicts, default=nosy.util.json_serializer)

        self.set_header("Content-Type", "application/json")
        self.write(json)