Python Features Examples

Programming Language: Python

Namespace/Package Name: rdt.data.mongo.features

Class/Type: Features

Examples at hotexamples.com: 7

Python Features - 7 examples found. These are the top rated real world Python examples of rdt.data.mongo.features.Features extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

find(4)

upsert(2)

Example #1

Show file

File: features.py Project: juchiyama/bigdata_fall2015

class FeaturesDBTestCase(unittest.TestCase):

	def setUp(self):
		self.t = self.assertTrue
		self.inst = self.assertIsInstance
		self.fts_db = Features(host="localhost",port=27017,database="reddit_stream",collection="features")
	def tearDown(self):	
		pass

	def test_convert_list_to_tuples(self):
		for doc in self.fts_db.find({"subreddit" : "UkrainianConflict"}, to_tuples=True,field="bigrams"):
		 	print(doc)

Example #2

Show file

File: autoclassifier.py Project: juchiyama/bigdata_fall2015

class AutoClassifierTestCase(unittest.TestCase):

	def setUp(self):
		self.t = self.assertTrue
		self.inst = self.assertIsInstance
		self.ft_db = Features(host='localhost',port=27017,database="reddit_stream",collection="features")
		self.source = AnnotatedSource(host="localhost",port=27017,database="reddit_stream",collection="big_combined")
	def tearDown(self):	
		pass

	def test_classifier(self):
		bgram_doc = list(self.ft_db.find({"subreddit" : "UkrainianConflict"},to_tuples=True,field="bigrams"))[0]
		allbgram_doc = list(self.ft_db.find({"subreddit" : "all"}, to_tuples=True, field='bigrams'))[0]

		pos_fts = { d[0]:True for d in bgram_doc["bigrams"] }
		neu_fts = { d[0]:True for d in allbgram_doc["bigrams"] }
		
		ukr = []
		neu = []

		for doc, fts in self.source.find_ft({"subreddit" : "UkrainianConflict"}):
			nomore = []
			for key in fts.keys():
				if key not in pos_fts:
					nomore = []
				for n in nomore:
					del fts[n]
			if len(fts.keys()) > 0:
				ukr.append(fts)

		for doc, fts in self.source.find_ft(limit=6000):
			neu.append(fts)

		nvb = PositiveNaiveBayesClassifier.train(ukr,neu)
		for do, fts in self.source.find_ft(skip=6000,limit=10):
			print(nvb.classify(fts))
		nvb.show_most_informative_features()

		"""ukr = []

Example #3

Show file

File: sbrnb.py Project: juchiyama/bigdata_fall2015

class SubredditClassifierTestCase(unittest.TestCase):

	def setUp(self):
		self.t = self.assertTrue
		self.inst = self.assertIsInstance
		self.feature = Features(host="localhost",port=27017,database="reddit_stream",collection="features")
		self.source = AnnotatedSource(host="localhost",port=27017,database="reddit_stream",collection="big_combined")
	def tearDown(self):
		pass

	def test_bigram(self):
		bg = list(self.feature.find({"subreddit" : "UkrainianConflict"}, to_tuples=True,field="bigrams"))[0]
		bg = [ d[0] for d in bg["bigrams"] ]
		words = [d[0] for d in bg]
		words.extend([d[1] for d in bg])
		words = list(set(words))
		# print(words)
		# print(bg)
		yay = []
		for doc, ft in self.source.find_ft({"subreddit" : "UkrainianConflict"},batch_size=1000):
			tups = ft.keys()
			the_words = list(set([d[0] for d in tups] + [d[1] for d in tups]))
			# is identifying words in the_words
			for word in words:
				ft["contains(" + word + ")"] = word in the_words
			to_dump = []
			for key in ft.keys():
				if key not in bg:
					to_dump.append(key)
			for dump in to_dump:
				del ft[dump]
			if len(ft.keys()) > 0:
				yay.append((ft,"UkrainianConflict"))
			#print()
			#print(bg)
		for doc, ft in self.source.find_ft({}, limit=6000,batch_size=1000):
			yay.append((ft, "Not UkrainianConflict"))

		random.shuffle(yay)
		test_set, train_set = yay[int(len(yay)/2):], yay[:int(len(yay)/2)]
		classifier = nltk.NaiveBayesClassifier.train(train_set)

		classifier.show_most_informative_features()

		for doc, ft in self.source.find_ft({"subreddit" : "news"}, skip=6000,batch_size=1000):
			if classifier.classify(ft) == "UkrainianConflict":
				print("YAY", doc)

Example #4

Show file

File: classifier_edit.py Project: juchiyama/bigdata_fall2015

from rdt.data.mongo.features import Features
import sys

if __name__ == "__main__":
	subreddit = sys.argv[1]
	fts = Features(host="localhost",port=27017,database="reddit_stream",collection="features")
	bgrams = list(fts.find({"subreddit" : subreddit}))[0]["bigrams"]
	# bgrams = list(filter(lambda x : True if x[0]))
	while 1:
		print(bgrams)
		print("what do you want remove?")
		word1 = input("enter the first word: ")
		word2 = input("enter the second word: ")
		bgrams = list(filter(lambda x : x[0][0] != word1 and x[0][1] != word2, bgrams))
		action = input("(w)rite, (q)uit, (c)ontinue")
		if action == "w":
			fts.upsert({"subreddit" : subreddit}, {"bigrams" : bgrams})
		if action == "q":
			break

Example #5

Show file

File: sbrnb.py Project: juchiyama/bigdata_fall2015

	def setUp(self):
		self.t = self.assertTrue
		self.inst = self.assertIsInstance
		self.feature = Features(host="localhost",port=27017,database="reddit_stream",collection="features")
		self.source = AnnotatedSource(host="localhost",port=27017,database="reddit_stream",collection="big_combined")

Example #6

Show file

File: features.py Project: juchiyama/bigdata_fall2015

	def setUp(self):
		self.t = self.assertTrue
		self.inst = self.assertIsInstance
		self.fts_db = Features(host="localhost",port=27017,database="reddit_stream",collection="features")

Example #7

Show file

File: allsubreddits_bigram.py Project: juchiyama/bigdata_fall2015

from rdt.data.mongo.features import Features
import rdt.job as job, nltk, sys
from nltk.corpus import stopwords

if __name__ == "__main__":
	stopwords = stopwords.words('english') + ['-','https', '%','[', ']', "''", "``",'--', "'s", ",", ".","-","(",")",":","n't", "?","!"]
	ft_db=Features(host="localhost",port=27017,database="reddit_stream",collection="features")
	job = job.AnnotatedSource(host="localhost",port=27017,database="reddit_stream",collection="big_combined")
	gen = job.to_words({}, remove_stopwords=True, limit=6000)
	finder = job.bigram_collocation_finder(gen)
	finder.apply_freq_filter(4)
	finder.apply_word_filter(lambda w: w in stopwords)
	bigram_measures = nltk.collocations.BigramAssocMeasures()
	scored = finder.score_ngrams(bigram_measures.raw_freq)
	ft_db.upsert({"subreddit" : "all"}, {"bigrams" : sorted(finder.ngram_fd.items(), key=lambda t:(-t[1], t[0])) })
	# print(sorted(finder.ngram_fd.items(), key=lambda t:(-t[1], t[0]))[:10])
	# print(len(finder.ngram_fd.items()))