def mapper(input_value):
	#This mapper function will do one more thing than simply mapping
	#It obtains a string which is split into a list of words
	#After splitting into a list of words it will check whether it has unique-id assigned to it or not.
	#If it has unique-id which has been assigned to it then that id is used otherwise a unique_id is assigned to ir
	
	connection.register([IndexedWordList])
	lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement})
	lodl = sorted(list(lod))
	#print lodl

	l=[]
	for i in input_value.split():
		start_char_to_int = ord(i[0]) - 97 #This converts the character to its corresponding ascii value
		if start_char_to_int < 0 or start_char_to_int>26:
			start_char_to_int = 26
		
		#print start_char_to_int
		
		#print "LENGTH OF LIST:::",len(lodl)
		
		pwo = lodl[start_char_to_int] 	#This is the particular word object
		wd = pwo.words		#pw is a dictionary of words
		
		if i not in wd.keys():
			wd[i] = len(wd)
			x = collName.IndexedWordList.find_one({'required_for':indexed_word_list_requirement,'word_start_id':float(start_char_to_int)})
			x.words = wd
			x.save()
		
		#print start_char_to_int
		word_unique_id = start_char_to_int * id_gen_number + wd[i]
		#print word_unique_id," for the word ",i
		l.append([str(word_unique_id),1])
	return l
def td_doc():
	connection.register([IndexedWordList])
	connection.register([ReducedDocs])
	lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement})	#list_of_documents_cursor
	mrd = collName.ReducedDocs.find({'required_for':reduced_doc_requirement})	#map_reduced_documents
	mrdl = list(mrd)
	#print "LENGTH OF MAP REDUCED DOCUMENT LIST >>>",len(mrdl)
		
	for pwdl in lod:	
		#particulat_word_list
		start_int = int(pwdl.word_start_id)
		start_char = str(unichr(96+start_int))
		wod = pwdl.words	#word_object_dictionary
		#print "START CHAR---->",start_char
		#print "WORD OBJECT DICTIONARY BEFORE  ---->",wod	
		
		for pmrd in mrdl:
			#particular_map_reduced_document
			#print pmrd
			if not pmrd.is_indexed:
				wd = pmrd.content
				#print "WORD CONTENT OF ",pmrd._id,"\n",wd
				for i in wd:
					if i.startswith(start_char):
						#print i
						if i not in wod:
							wod[i] = {}
						wod[i][str(pmrd.orignal_id)]=wd[i]
		pwdl.words = wod
		#print "WORD OBJECT DICTIONARY AFTER  ---->",wod
		pwdl.save()
	
	for pmrd in mrdl:
		pmrd.is_indexed = True
		pmrd.save()
Example #3
0
def insert(request):
	
	connection.register([MyRaw])
	collName = get_database().raw
	y = collName.MyRaw()
	
	print request.POST["f_name"],'\t',request.POST["f_tags"],'\t',request.POST["f_content"]
	y.name = request.POST["f_name"]
	tag_l = request.POST["f_tags"].split(",")
	y.tags = tag_l
	y.content = request.POST["f_content"]
	y.save()
	
	name_id = "name"
	tag_id = "tags"
	content_id = "content"
	
	obj = collName.MyRaw.find({name_id:request.POST["f_name"],content_id:request.POST["f_content"]})
	obj_l = list(obj)
	
	for i in obj_l:
		obj_id = str(i._id)
		
	print obj_id
	#After Saving this it is important that we also include it in "to_reduce" collction
	
	collName2 = get_database().to_reduce
	z = collName2.ToReduce()
	z._id = ObjectId(obj_id)
	z.save()
	return render(request,"raw_reduce/thankYou.html",{})
def generate_big_dict():
	#This function will generate a big dictionary i.e. it will simply go and combine all the dictionaries together
	connection.register([IndexedWordList])
	lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement})
	lodl = list(lod)
	prefs = {}
	for x in lodl:
		if x.words:
			prefs.update(x.words)		
	#print prefs
	return prefs	
def find_num_distinct_words():
	
	connection.register([IndexedWordList])
	lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement})
	
	lodl = list(lod)
	#print "LENGTH OF LIST FIND_NUM_DISTINCT_WORDS>>>>",len(lodl)
	num_distinct_words = 0
	for i in lodl:
		num_distinct_words+=len(i.words)
	
	#print num_distinct_words
	return num_distinct_words
def td_doc():
	"""
	#{'word':{'ObjectId':number_of_occurances,'ObjectId':number_of_occurances}}
	This is the kind of dictionary which is required and will be created on the fly
	Since we have already stored the map reduced documents, this function will be pretty fast.
	The only thing which shall take time in our code is the MapReduce function	
	"""
	
	connection.register([IndexedWordList])
	connection.register([ReducedDocs])
	
	#This is the list of documents which contains the indexed words
	
	lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement})	#list_of_documents_cursor
	
	"""
		What does indexing mean?
		In our scenario,indexing simply means to store the number if occurances of a particular word in each and every document.
		
	"""
	mrd = collName.ReducedDocs.find({'required_for':reduced_doc_requirement})	#map_reduced_documents
	mrdl = list(mrd)
	
		
	for pwdl in lod:	
		#particulat_word_list
		start_int = int(pwdl.word_start_id)
		start_char = str(unichr(96+start_int)) 	#This tells what is the starting character of the word
		wod = pwdl.words	#word_object_dictionary		
		
		for pmrd in mrdl:
			#particular_map_reduced_document
			#print pmrd
			if not pmrd.is_indexed:
				wd = pmrd.content
				
				for i in wd:
					if i.startswith(start_char):
						
						if i not in wod:
							wod[i] = {}
						wod[i][str(pmrd.orignal_id)]=wd[i]
		pwdl.words = wod
		#print "WORD OBJECT DICTIONARY AFTER  ---->",wod
		pwdl.save()
	
	for pmrd in mrdl:
		pmrd.is_indexed = True
		pmrd.save()
def insert(request):
	connection.register([MyDocs])
	connection.register([ToReduceDocs])
	y = collName.MyDocs()
	y.content = request.POST['f_content']
	y.required_for = my_doc_requirement
	y.save()
	
	z = collName.MyDocs.find_one({'content':y.content,'required_for':my_doc_requirement})
	if z:
		x = collName.ToReduceDocs()
		x.doc_id = z._id
		x.required_for = to_reduce_doc_requirement
		x.save()
		return render(request,'cf/thankYou.html',{})
	return render(request,'cf/error.html',{})
def edit_object(request):
	connection.register([MyDocs])
	connection.register([ToReduceDocs])
	
	obj_id = ObjectId(request.POST["f_id"])
	x = collName.MyDocs.find_one({"_id":obj_id,'required_for':my_doc_requirement})
	
	if x:	
		x.content = request.POST["f_content"]
		x.save()	
	y = collName.ToReduceDocs.find_one({'doc_id':obj_id,'required_for':to_reduce_doc_requirement})
	if not y:
		z = collName.ToReduceDocs()
		z.doc_id = obj_id
		z.required_for = to_reduce_doc_requirement
		z.save()		
	return render(request,'cf/thankYou.html',{})
def create_td_matrix():
	#This function is responsible for creating a term-document matrix.
	#The way the things will be stored is [(),(),(),......] One Tuple for each document present in the database
	#In each tuple () ---> doc_id,{"word":word_count,"word":word_count} 
	
	connection.register([ReducedDocs])
	
	rdl = list(collName.ReducedDocs.find({'required_for':reduced_doc_requirement})) #ReducedDocList
	tdl = [] #Term-Document List
#	s = set()
	for td in rdl:
		tdl.append((td.orignal_id,td.content))
#		for x in td.content.keys():
#			s.add(x)
#	s_l = list(s)
#	s_l_z = zip(s_l,range(len(s_l)))
	#print "TD-LIST:",tdl	
	#print "SET S:",s
#	print "SET_LIST_ZIPPED:::",s_l_z
#	return tdl,s,s_l_z
	return tdl
def perform_map_reduce(request):
	connection.register([MyDocs])
	connection.register([ReducedDocs])
	connection.register([ToReduceDocs])
	
	dltr=list(collName.ToReduceDocs.find({'required_for':to_reduce_doc_requirement}))	#document_list_to_reduce
	
	for doc in dltr:
		doc_id = doc.doc_id
		orignal_doc = collName.MyDocs.find_one({"_id":doc_id,'required_for':my_doc_requirement})
		content_dict = dict(map_reduce(orignal_doc.content,mapper,reducer))
		
		dord = collName.ReducedDocs.find_one({"orignal_id":doc_id,'required_for':reduced_doc_requirement}) #doc of reduced docs
		if dord:
			dord.content=content_dict
			dord.is_indexed = False
			dord.save()
		else:
			new_doc = collName.ReducedDocs()
			new_doc.content = content_dict
			new_doc.orignal_id = doc_id
			new_doc.required_for = reduced_doc_requirement
			new_doc.is_indexed = False
			new_doc.save()
		doc.delete()
	
	return render(request,'cf/thankYou.html',{})
Example #11
0
def edit_object(request):
	#return HttpResponse("Edit Object")
	#This function mimics the difficulties we are going to have in implementing edit_object functionality
	#The user comes and edits a particular object
	#The object will be edited but what about the map reduce.
	#We will again have to perform map reduce on it
	#But performing map reduce with each edit will be very tedious
	#Thus, we will have to perform map reduce as a cron job
	#Thus, we will have to maintain a log of object id's on which we want to perform map reduce
	#Thus, this function will edit the object and then lodge that objectID in a new collection named as "to_reduce"
	
	#It is possible that there are more than one update before you run your cron job
	#Thus, make sure that you check that the object Id is not already present in the "to_reduce" collection before inserting it
	
	connection.register([MyRaw])
	collName = get_database().raw
	obj = ObjectId(request.POST["f_id"])
	print obj
	instances = collName.MyRaw.find({"_id":obj})
	y = list(instances)
	print y
	
	for z in y:
		z.name = request.POST["f_name"]	
		z.tags = request.POST["f_tags"].split(",")
		z.content = request.POST["f_content"]
		z.save()
		
	collName2 = get_database().to_reduce
	
	instances = collName2.ToReduce.find({"_id":obj})
	y = list(instances)
	
	if not y:
		x = collName2.ToReduce()
		x._id = obj
		x.save()	
	
	return render(request,"raw_reduce/thankYou.html",{})
Example #12
0
import datetime
from django_mongokit import connection
from django_mongokit.document import DjangoDocument


# Create your models here.
class Talk(DjangoDocument):
    collection_name = 'talks'
    structure = {
        'topic': unicode,
        'when': datetime.datetime,
        'tags': list,
        'duration': float,
    }

    required_fields = ['topic', 'when', 'duration']

    use_dot_notation = True


connection.register([Talk])
Example #13
0
    collection_name = 'bookmarks'
    structure = {
        'seq': int,
        'tags': [unicode],
        'user': unicode,
        'url': unicode,
        'created': datetime.datetime,
        'private': bool,
        'title': unicode,
        'notes': unicode,
        'snapshot': [unicode],
        #'author': unicode,
        #'year': unicode,
        }

    default_values = {
        'created': datetime.datetime.utcnow,
        'seq': getNextVal,
        }

    use_dot_notation = True

    indexes = [
        {'fields': ['user','url','created', 'seq']},
        ]
    def __unicode__(self):
        return self.title

connection.register([Bookmark])
Example #14
0
class Computer(Document):
    structure = {
        'make': unicode,
        'model': unicode,
        'purchase_date': unicode,
        'cpu_ghz': unicode,
    }

    validators = {
        'cpu_ghz': lambda x: x > 0,
        'make': lambda x: x.strip(),
    }

    default_values = {
        'purchase_date': datetime.datetime.utcnow,
    }

    use_dot_notation = True

    indexes = [
        {
            'fields': ['make']
        },
    ]


connection.register([Computer])

# Create your models here.
Example #15
0
def ajax_call(request):
    #print 'AJAX REACHED HERE'
    #print request.GET['sVal']
    #x = request.GET['sVal']+" returned"
    #print 'x is ',x
    #my_own_dict = {'result':x}

    x = request.GET['sVal']
    #	print x

    connection.register([AB])

    #	print '2',x

    collName = get_database().autoSuggestCollection
    # 	collName = get_database().examples

    #	print '3',x

    #conditions = {'author':{'$regex':'/^'+x+'/'}}
    #conditions = {u"author":'/^'+x+'/'}
    #conditions = { 'author': { '$regex': '/^v/'} }
    #print conditions

    #regex = re.compile("/^v/")
    #print 'REGEX:::::::::::::::::::::::::::::::::::::',regex

    instances = collName.AB.find({"author": {'$regex': '^' + x}})

    # 	instances = collName.AB.find({u"author":u"/^v/"})
    #instances = collName.AB.find()

    #instances = collName.AB.find({"author":regex})
    #instances = collName.AB.find({"author":"vlt"})
    #instances = collName.AB.find({"author":"/^v/"})

    print 'CURSORLLLLL', instances, '\n\n\n\n\n'
    """
	my_own_list = list(instances)
	print my_own_list
	
	
	#print '\n\n\n0'
	#print my_own_list[0]
	
	#i=0
	my_own_dict = {}
	
	print 'HELLO\n'
	j=0
	
	for i in my_own_list:
		print 'xxxx',i
		my_own_dict[j] = i
		j=j+1
	
	for node in instances:
		json.dumps(node,)	"""
    #print my_own_dict
    print 'TRYING TO ENCODE'
    # instances.rewind()
    print "\n count: ", instances.count(), "\n"
    #y = json.dumps(my_own_dict)
    #print 'PRINTING JSON DUMP'
    #print y
    #return HttpResponse(json.dumps(my_own_dict))
    return HttpResponse(dumps(list(instances)))
Example #16
0
from django.db import models
from django_mongokit import connection
from mongokit import Document

class Computer(Document):
	structure = {
		'make': unicode, 
		'model': unicode, 
		'purchase_date': unicode,
		'cpu_ghz': unicode,
	}
	
	validators = {
		'cpu_ghz': lambda x: x>0,
		'make': lambda x: x.strip(),
	}
	
	default_values = {
		'purchase_date': datetime.datetime.utcnow,
	}

	use_dot_notation = True
	
	indexes = [
		{'fields': ['make']},
	]
	
connection.register([Computer])

# Create your models here.
Example #17
0
class BlogPost(DjangoDocument):
    class Meta:
        verbose_name_plural = 'BlogPosts'

    structure = {
        'title': unicode,
        'content': unicode,
        'author': unicode,
        'published_date': datetime.datetime,
        'slug': unicode
    }

    required_fields = [ 'title', 'content', 'author', 'published_date']

    default_values = {
        'published_date': datetime.datetime.utcnow,
    }

    use_dot_notation = True

    indexes = [
        { 'fields': ['published_date'] }        
        ]

    def save(self, *args, **kwargs):
        if not self.get('slug'):
            self['slug'] = slugify(self.title)
        super(BlogPost, self).save(*args, **kwargs)

connection.register([BlogPost])
Example #18
0
from django.db import models

from django_mongokit import connection
from django_mongokit.document import DjangoDocument

from bson import ObjectId

class MyRaw(DjangoDocument):
	structure = {
		'name':unicode,
		'tags':[unicode],
		'content':unicode,
	}
	use_dot_notation = True

connection.register([MyRaw])

class MyReduce(DjangoDocument):
	structure = {
		'name':dict,
		'tags':[unicode],
		'content':dict,
		'orignal':ObjectId #This is the objectId of that object whose map reduce we have performed
	}
	use_dot_notation = True
	
connection.register([MyReduce])	

class ToReduce(DjangoDocument):
	structure = {
		'_id':ObjectId
from django.db import models

from django_mongokit.document import DjangoDocument
from django_mongokit import connection

from bson import ObjectId



class MyDocs(DjangoDocument):
	structure={
		'content':unicode,
		'required_for':unicode,
	} 
	use_dot_notation = True
connection.register([MyDocs])

class ReducedDocs(DjangoDocument):
	structure={
		'content':dict, #This contains the content in the dictionary format
		'orignal_id':ObjectId,
		'required_for':unicode,
		'is_indexed':bool, #This will be true if the map reduced document has been indexed.If it is not then it will be false
	}
	use_dot_notation = True

class ToReduceDocs(DjangoDocument):
	structure={
		'doc_id':ObjectId,
		'required_for':unicode,
	}
Example #20
0
from django.db import models

from django_mongokit import connection
from django_mongokit.document import DjangoDocument

class AB(DjangoDocument):
	structure={
		'author':unicode,
		'book':unicode,
	}
	use_dot_notation=True
	
#connection.register([AB])
connection.register([AB])
	
	

# Create your models here.
Example #21
0
import datetime
from django_mongokit import connection
from django_mongokit.document import DjangoDocument


# Create your models here.
class Talk(DjangoDocument):
    collection_name = 'talks'
    structure = {
        'topic': unicode,
        'when': datetime.datetime,
        'tags': list,
        'duration': float,
    }

    required_fields = ['topic', 'when', 'duration']

    use_dot_notation = True

connection.register([Talk])
Example #22
0
def ajax_call(request):
	#print 'AJAX REACHED HERE'
	#print request.GET['sVal']
	#x = request.GET['sVal']+" returned"
	#print 'x is ',x
	#my_own_dict = {'result':x}
	
	x = request.GET['sVal']
#	print x
		
	connection.register([AB])
	
#	print '2',x
	
	collName = get_database().autoSuggestCollection
# 	collName = get_database().examples	
	
#	print '3',x

	#conditions = {'author':{'$regex':'/^'+x+'/'}}
	#conditions = {u"author":'/^'+x+'/'}
	#conditions = { 'author': { '$regex': '/^v/'} }
	#print conditions
	
	
	#regex = re.compile("/^v/")
	#print 'REGEX:::::::::::::::::::::::::::::::::::::',regex
	
	
	instances = collName.AB.find( { "author": { '$regex' : '^' + x} })
	
# 	instances = collName.AB.find({u"author":u"/^v/"})
	#instances = collName.AB.find()
	
	#instances = collName.AB.find({"author":regex})
	#instances = collName.AB.find({"author":"vlt"})
	#instances = collName.AB.find({"author":"/^v/"})				
	
	print 'CURSORLLLLL',instances,'\n\n\n\n\n'
	"""
	my_own_list = list(instances)
	print my_own_list
	
	
	#print '\n\n\n0'
	#print my_own_list[0]
	
	#i=0
	my_own_dict = {}
	
	print 'HELLO\n'
	j=0
	
	for i in my_own_list:
		print 'xxxx',i
		my_own_dict[j] = i
		j=j+1
	
	for node in instances:
		json.dumps(node,)	"""
	#print my_own_dict
	print 'TRYING TO ENCODE'
	# instances.rewind()
	print "\n count: ", instances.count(), "\n"
	#y = json.dumps(my_own_dict)
	#print 'PRINTING JSON DUMP'
	#print y
	#return HttpResponse(json.dumps(my_own_dict))
	return HttpResponse(dumps(list(instances)))