def mapper(input_value): #This mapper function will do one more thing than simply mapping #It obtains a string which is split into a list of words #After splitting into a list of words it will check whether it has unique-id assigned to it or not. #If it has unique-id which has been assigned to it then that id is used otherwise a unique_id is assigned to ir connection.register([IndexedWordList]) lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement}) lodl = sorted(list(lod)) #print lodl l=[] for i in input_value.split(): start_char_to_int = ord(i[0]) - 97 #This converts the character to its corresponding ascii value if start_char_to_int < 0 or start_char_to_int>26: start_char_to_int = 26 #print start_char_to_int #print "LENGTH OF LIST:::",len(lodl) pwo = lodl[start_char_to_int] #This is the particular word object wd = pwo.words #pw is a dictionary of words if i not in wd.keys(): wd[i] = len(wd) x = collName.IndexedWordList.find_one({'required_for':indexed_word_list_requirement,'word_start_id':float(start_char_to_int)}) x.words = wd x.save() #print start_char_to_int word_unique_id = start_char_to_int * id_gen_number + wd[i] #print word_unique_id," for the word ",i l.append([str(word_unique_id),1]) return l
def td_doc(): connection.register([IndexedWordList]) connection.register([ReducedDocs]) lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement}) #list_of_documents_cursor mrd = collName.ReducedDocs.find({'required_for':reduced_doc_requirement}) #map_reduced_documents mrdl = list(mrd) #print "LENGTH OF MAP REDUCED DOCUMENT LIST >>>",len(mrdl) for pwdl in lod: #particulat_word_list start_int = int(pwdl.word_start_id) start_char = str(unichr(96+start_int)) wod = pwdl.words #word_object_dictionary #print "START CHAR---->",start_char #print "WORD OBJECT DICTIONARY BEFORE ---->",wod for pmrd in mrdl: #particular_map_reduced_document #print pmrd if not pmrd.is_indexed: wd = pmrd.content #print "WORD CONTENT OF ",pmrd._id,"\n",wd for i in wd: if i.startswith(start_char): #print i if i not in wod: wod[i] = {} wod[i][str(pmrd.orignal_id)]=wd[i] pwdl.words = wod #print "WORD OBJECT DICTIONARY AFTER ---->",wod pwdl.save() for pmrd in mrdl: pmrd.is_indexed = True pmrd.save()
def insert(request): connection.register([MyRaw]) collName = get_database().raw y = collName.MyRaw() print request.POST["f_name"],'\t',request.POST["f_tags"],'\t',request.POST["f_content"] y.name = request.POST["f_name"] tag_l = request.POST["f_tags"].split(",") y.tags = tag_l y.content = request.POST["f_content"] y.save() name_id = "name" tag_id = "tags" content_id = "content" obj = collName.MyRaw.find({name_id:request.POST["f_name"],content_id:request.POST["f_content"]}) obj_l = list(obj) for i in obj_l: obj_id = str(i._id) print obj_id #After Saving this it is important that we also include it in "to_reduce" collction collName2 = get_database().to_reduce z = collName2.ToReduce() z._id = ObjectId(obj_id) z.save() return render(request,"raw_reduce/thankYou.html",{})
def generate_big_dict(): #This function will generate a big dictionary i.e. it will simply go and combine all the dictionaries together connection.register([IndexedWordList]) lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement}) lodl = list(lod) prefs = {} for x in lodl: if x.words: prefs.update(x.words) #print prefs return prefs
def find_num_distinct_words(): connection.register([IndexedWordList]) lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement}) lodl = list(lod) #print "LENGTH OF LIST FIND_NUM_DISTINCT_WORDS>>>>",len(lodl) num_distinct_words = 0 for i in lodl: num_distinct_words+=len(i.words) #print num_distinct_words return num_distinct_words
def td_doc(): """ #{'word':{'ObjectId':number_of_occurances,'ObjectId':number_of_occurances}} This is the kind of dictionary which is required and will be created on the fly Since we have already stored the map reduced documents, this function will be pretty fast. The only thing which shall take time in our code is the MapReduce function """ connection.register([IndexedWordList]) connection.register([ReducedDocs]) #This is the list of documents which contains the indexed words lod = collName.IndexedWordList.find({'required_for':indexed_word_list_requirement}) #list_of_documents_cursor """ What does indexing mean? In our scenario,indexing simply means to store the number if occurances of a particular word in each and every document. """ mrd = collName.ReducedDocs.find({'required_for':reduced_doc_requirement}) #map_reduced_documents mrdl = list(mrd) for pwdl in lod: #particulat_word_list start_int = int(pwdl.word_start_id) start_char = str(unichr(96+start_int)) #This tells what is the starting character of the word wod = pwdl.words #word_object_dictionary for pmrd in mrdl: #particular_map_reduced_document #print pmrd if not pmrd.is_indexed: wd = pmrd.content for i in wd: if i.startswith(start_char): if i not in wod: wod[i] = {} wod[i][str(pmrd.orignal_id)]=wd[i] pwdl.words = wod #print "WORD OBJECT DICTIONARY AFTER ---->",wod pwdl.save() for pmrd in mrdl: pmrd.is_indexed = True pmrd.save()
def insert(request): connection.register([MyDocs]) connection.register([ToReduceDocs]) y = collName.MyDocs() y.content = request.POST['f_content'] y.required_for = my_doc_requirement y.save() z = collName.MyDocs.find_one({'content':y.content,'required_for':my_doc_requirement}) if z: x = collName.ToReduceDocs() x.doc_id = z._id x.required_for = to_reduce_doc_requirement x.save() return render(request,'cf/thankYou.html',{}) return render(request,'cf/error.html',{})
def edit_object(request): connection.register([MyDocs]) connection.register([ToReduceDocs]) obj_id = ObjectId(request.POST["f_id"]) x = collName.MyDocs.find_one({"_id":obj_id,'required_for':my_doc_requirement}) if x: x.content = request.POST["f_content"] x.save() y = collName.ToReduceDocs.find_one({'doc_id':obj_id,'required_for':to_reduce_doc_requirement}) if not y: z = collName.ToReduceDocs() z.doc_id = obj_id z.required_for = to_reduce_doc_requirement z.save() return render(request,'cf/thankYou.html',{})
def create_td_matrix(): #This function is responsible for creating a term-document matrix. #The way the things will be stored is [(),(),(),......] One Tuple for each document present in the database #In each tuple () ---> doc_id,{"word":word_count,"word":word_count} connection.register([ReducedDocs]) rdl = list(collName.ReducedDocs.find({'required_for':reduced_doc_requirement})) #ReducedDocList tdl = [] #Term-Document List # s = set() for td in rdl: tdl.append((td.orignal_id,td.content)) # for x in td.content.keys(): # s.add(x) # s_l = list(s) # s_l_z = zip(s_l,range(len(s_l))) #print "TD-LIST:",tdl #print "SET S:",s # print "SET_LIST_ZIPPED:::",s_l_z # return tdl,s,s_l_z return tdl
def perform_map_reduce(request): connection.register([MyDocs]) connection.register([ReducedDocs]) connection.register([ToReduceDocs]) dltr=list(collName.ToReduceDocs.find({'required_for':to_reduce_doc_requirement})) #document_list_to_reduce for doc in dltr: doc_id = doc.doc_id orignal_doc = collName.MyDocs.find_one({"_id":doc_id,'required_for':my_doc_requirement}) content_dict = dict(map_reduce(orignal_doc.content,mapper,reducer)) dord = collName.ReducedDocs.find_one({"orignal_id":doc_id,'required_for':reduced_doc_requirement}) #doc of reduced docs if dord: dord.content=content_dict dord.is_indexed = False dord.save() else: new_doc = collName.ReducedDocs() new_doc.content = content_dict new_doc.orignal_id = doc_id new_doc.required_for = reduced_doc_requirement new_doc.is_indexed = False new_doc.save() doc.delete() return render(request,'cf/thankYou.html',{})
def edit_object(request): #return HttpResponse("Edit Object") #This function mimics the difficulties we are going to have in implementing edit_object functionality #The user comes and edits a particular object #The object will be edited but what about the map reduce. #We will again have to perform map reduce on it #But performing map reduce with each edit will be very tedious #Thus, we will have to perform map reduce as a cron job #Thus, we will have to maintain a log of object id's on which we want to perform map reduce #Thus, this function will edit the object and then lodge that objectID in a new collection named as "to_reduce" #It is possible that there are more than one update before you run your cron job #Thus, make sure that you check that the object Id is not already present in the "to_reduce" collection before inserting it connection.register([MyRaw]) collName = get_database().raw obj = ObjectId(request.POST["f_id"]) print obj instances = collName.MyRaw.find({"_id":obj}) y = list(instances) print y for z in y: z.name = request.POST["f_name"] z.tags = request.POST["f_tags"].split(",") z.content = request.POST["f_content"] z.save() collName2 = get_database().to_reduce instances = collName2.ToReduce.find({"_id":obj}) y = list(instances) if not y: x = collName2.ToReduce() x._id = obj x.save() return render(request,"raw_reduce/thankYou.html",{})
import datetime from django_mongokit import connection from django_mongokit.document import DjangoDocument # Create your models here. class Talk(DjangoDocument): collection_name = 'talks' structure = { 'topic': unicode, 'when': datetime.datetime, 'tags': list, 'duration': float, } required_fields = ['topic', 'when', 'duration'] use_dot_notation = True connection.register([Talk])
collection_name = 'bookmarks' structure = { 'seq': int, 'tags': [unicode], 'user': unicode, 'url': unicode, 'created': datetime.datetime, 'private': bool, 'title': unicode, 'notes': unicode, 'snapshot': [unicode], #'author': unicode, #'year': unicode, } default_values = { 'created': datetime.datetime.utcnow, 'seq': getNextVal, } use_dot_notation = True indexes = [ {'fields': ['user','url','created', 'seq']}, ] def __unicode__(self): return self.title connection.register([Bookmark])
class Computer(Document): structure = { 'make': unicode, 'model': unicode, 'purchase_date': unicode, 'cpu_ghz': unicode, } validators = { 'cpu_ghz': lambda x: x > 0, 'make': lambda x: x.strip(), } default_values = { 'purchase_date': datetime.datetime.utcnow, } use_dot_notation = True indexes = [ { 'fields': ['make'] }, ] connection.register([Computer]) # Create your models here.
def ajax_call(request): #print 'AJAX REACHED HERE' #print request.GET['sVal'] #x = request.GET['sVal']+" returned" #print 'x is ',x #my_own_dict = {'result':x} x = request.GET['sVal'] # print x connection.register([AB]) # print '2',x collName = get_database().autoSuggestCollection # collName = get_database().examples # print '3',x #conditions = {'author':{'$regex':'/^'+x+'/'}} #conditions = {u"author":'/^'+x+'/'} #conditions = { 'author': { '$regex': '/^v/'} } #print conditions #regex = re.compile("/^v/") #print 'REGEX:::::::::::::::::::::::::::::::::::::',regex instances = collName.AB.find({"author": {'$regex': '^' + x}}) # instances = collName.AB.find({u"author":u"/^v/"}) #instances = collName.AB.find() #instances = collName.AB.find({"author":regex}) #instances = collName.AB.find({"author":"vlt"}) #instances = collName.AB.find({"author":"/^v/"}) print 'CURSORLLLLL', instances, '\n\n\n\n\n' """ my_own_list = list(instances) print my_own_list #print '\n\n\n0' #print my_own_list[0] #i=0 my_own_dict = {} print 'HELLO\n' j=0 for i in my_own_list: print 'xxxx',i my_own_dict[j] = i j=j+1 for node in instances: json.dumps(node,) """ #print my_own_dict print 'TRYING TO ENCODE' # instances.rewind() print "\n count: ", instances.count(), "\n" #y = json.dumps(my_own_dict) #print 'PRINTING JSON DUMP' #print y #return HttpResponse(json.dumps(my_own_dict)) return HttpResponse(dumps(list(instances)))
from django.db import models from django_mongokit import connection from mongokit import Document class Computer(Document): structure = { 'make': unicode, 'model': unicode, 'purchase_date': unicode, 'cpu_ghz': unicode, } validators = { 'cpu_ghz': lambda x: x>0, 'make': lambda x: x.strip(), } default_values = { 'purchase_date': datetime.datetime.utcnow, } use_dot_notation = True indexes = [ {'fields': ['make']}, ] connection.register([Computer]) # Create your models here.
class BlogPost(DjangoDocument): class Meta: verbose_name_plural = 'BlogPosts' structure = { 'title': unicode, 'content': unicode, 'author': unicode, 'published_date': datetime.datetime, 'slug': unicode } required_fields = [ 'title', 'content', 'author', 'published_date'] default_values = { 'published_date': datetime.datetime.utcnow, } use_dot_notation = True indexes = [ { 'fields': ['published_date'] } ] def save(self, *args, **kwargs): if not self.get('slug'): self['slug'] = slugify(self.title) super(BlogPost, self).save(*args, **kwargs) connection.register([BlogPost])
from django.db import models from django_mongokit import connection from django_mongokit.document import DjangoDocument from bson import ObjectId class MyRaw(DjangoDocument): structure = { 'name':unicode, 'tags':[unicode], 'content':unicode, } use_dot_notation = True connection.register([MyRaw]) class MyReduce(DjangoDocument): structure = { 'name':dict, 'tags':[unicode], 'content':dict, 'orignal':ObjectId #This is the objectId of that object whose map reduce we have performed } use_dot_notation = True connection.register([MyReduce]) class ToReduce(DjangoDocument): structure = { '_id':ObjectId
from django.db import models from django_mongokit.document import DjangoDocument from django_mongokit import connection from bson import ObjectId class MyDocs(DjangoDocument): structure={ 'content':unicode, 'required_for':unicode, } use_dot_notation = True connection.register([MyDocs]) class ReducedDocs(DjangoDocument): structure={ 'content':dict, #This contains the content in the dictionary format 'orignal_id':ObjectId, 'required_for':unicode, 'is_indexed':bool, #This will be true if the map reduced document has been indexed.If it is not then it will be false } use_dot_notation = True class ToReduceDocs(DjangoDocument): structure={ 'doc_id':ObjectId, 'required_for':unicode, }
from django.db import models from django_mongokit import connection from django_mongokit.document import DjangoDocument class AB(DjangoDocument): structure={ 'author':unicode, 'book':unicode, } use_dot_notation=True #connection.register([AB]) connection.register([AB]) # Create your models here.
def ajax_call(request): #print 'AJAX REACHED HERE' #print request.GET['sVal'] #x = request.GET['sVal']+" returned" #print 'x is ',x #my_own_dict = {'result':x} x = request.GET['sVal'] # print x connection.register([AB]) # print '2',x collName = get_database().autoSuggestCollection # collName = get_database().examples # print '3',x #conditions = {'author':{'$regex':'/^'+x+'/'}} #conditions = {u"author":'/^'+x+'/'} #conditions = { 'author': { '$regex': '/^v/'} } #print conditions #regex = re.compile("/^v/") #print 'REGEX:::::::::::::::::::::::::::::::::::::',regex instances = collName.AB.find( { "author": { '$regex' : '^' + x} }) # instances = collName.AB.find({u"author":u"/^v/"}) #instances = collName.AB.find() #instances = collName.AB.find({"author":regex}) #instances = collName.AB.find({"author":"vlt"}) #instances = collName.AB.find({"author":"/^v/"}) print 'CURSORLLLLL',instances,'\n\n\n\n\n' """ my_own_list = list(instances) print my_own_list #print '\n\n\n0' #print my_own_list[0] #i=0 my_own_dict = {} print 'HELLO\n' j=0 for i in my_own_list: print 'xxxx',i my_own_dict[j] = i j=j+1 for node in instances: json.dumps(node,) """ #print my_own_dict print 'TRYING TO ENCODE' # instances.rewind() print "\n count: ", instances.count(), "\n" #y = json.dumps(my_own_dict) #print 'PRINTING JSON DUMP' #print y #return HttpResponse(json.dumps(my_own_dict)) return HttpResponse(dumps(list(instances)))