def test_code(self): a_string = "hello world" a_code = Code("hello world") self.assertTrue(a_code.startswith("hello")) self.assertTrue(a_code.endswith("world")) self.assertTrue(isinstance(a_code, Code)) self.assertFalse(isinstance(a_string, Code)) self.assertIsNone(a_code.scope) with_scope = Code('hello world', {'my_var': 5}) self.assertEqual({'my_var': 5}, with_scope.scope) empty_scope = Code('hello world', {}) self.assertEqual({}, empty_scope.scope) another_scope = Code(with_scope, {'new_var': 42}) self.assertEqual(str(with_scope), str(another_scope)) self.assertEqual({'new_var': 42, 'my_var': 5}, another_scope.scope) # No error. Code(u'héllø world¡')
def where(self, code: Union[str, Code]) -> 'Cursor': """Adds a $where clause to this query. The `code` argument must be an instance of :class:`str` or :class:`~bson.code.Code` containing a JavaScript expression. This expression will be evaluated for each document scanned. Only those documents for which the expression evaluates to *true* will be returned as results. The keyword *this* refers to the object currently being scanned. Raises :class:`TypeError` if `code` is not an instance of :class:`str`. Raises :class:`~pymongo.errors.InvalidOperation` if this :class:`Cursor` has already been used. Only the last call to :meth:`where` applied to a :class:`Cursor` has any effect. :Parameters: - `code`: JavaScript expression to use as a filter """ self.__check_okay_to_chain() if not isinstance(code, Code): code = Code(code) self.__spec['$where'] = code return self
def test_repr(self): c = Code("hello world", {}) self.assertEqual(repr(c), "Code('hello world', {})") c.scope["foo"] = "bar" self.assertEqual(repr(c), "Code('hello world', {'foo': 'bar'})") c = Code("hello world", {"blah": 3}) self.assertEqual(repr(c), "Code('hello world', {'blah': 3})") c = Code("\x08\xFF") self.assertEqual(repr(c), "Code(%s, None)" % (repr("\x08\xFF"),))
def test_code(self): a_string = "hello world" a_code = Code("hello world") self.assert_(a_code.startswith("hello")) self.assert_(a_code.endswith("world")) self.assert_(isinstance(a_code, Code)) self.assertFalse(isinstance(a_string, Code)) self.assertEqual(a_code.scope, {}) a_code.scope["my_var"] = 5 self.assertEqual(a_code.scope, {"my_var": 5})
def getErr(queryData, beginTime, endTime): """ :param queryData: :param beginTime: :param endTime: :return: """ getErrbaseData = queryData IMSI = [] # 存储问题imsi for dic in getErrbaseData: IMSI.append(str(dic['imsi'])) condition = { "vsimImsi": { "$in": IMSI }, # "errType":8, # "mcc":'602', # "errCode":{"$in" : [7,]}, "errorTime": { "$gte": ((datetime_timestamp(beginTime)) * 1000), "$lte": ((datetime_timestamp(endTime)) * 1000) } } msMog = msmongo(MongoClient=MongoClient["N_oss_perflog"], Database=Database["N_oss_perflog"], Sheet=Sheet["t_term_vsim_estfail"]) key = {"vsimImsi": 1, "errType": 1, "errCode": 1} initial = {"count": 0} reducer = Code("""function(obj, prev){prev.count ++}""") lis_mongo_tab = ["vsimImsi", "errType", "errCode", "count"] mongoErr = pygroup(msMog, "t_term_vsim_estfail", key, condition, initial, reducer, lis_mongo_tab) returngetErrData = assemErrInfo(queryData=getErrbaseData, errData=mongoErr) return returngetErrData
def test_cursor(self): db = self.db db.drop_collection("test") docs = [{ 'foo': [1, 2] }, { 'bar': { 'hello': 'world' } }, { 'code': Code("function x() { return 1; }") }, { 'bin': Binary(b("\x00\x01\x02\x03\x04")) }, { 'dbref': { '_ref': DBRef('simple', ObjectId('509b8db456c02c5ab7e63c34')) } }] db.test.insert(docs) reloaded_docs = json_util.loads(json_util.dumps(db.test.find())) for doc in docs: self.assertTrue(doc in reloaded_docs)
def where(self, code): """Adds a `$where`_ clause to this query. The `code` argument must be an instance of :class:`basestring` (:class:`str` in python 3) or :class:`~bson.code.Code` containing a JavaScript expression. This expression will be evaluated for each document scanned. Only those documents for which the expression evaluates to *true* will be returned as results. The keyword *this* refers to the object currently being scanned. For example:: # Find all documents where field "a" is less than "b" plus "c". for doc in db.test.find().where('this.a < (this.b + this.c)'): print(doc) Raises :class:`TypeError` if `code` is not an instance of :class:`basestring` (:class:`str` in python 3). Raises :class:`~pymongo.errors.InvalidOperation` if this :class:`Cursor` has already been used. Only the last call to :meth:`where` applied to a :class:`Cursor` has any effect. .. note:: MongoDB 4.4 drops support for :class:`~bson.code.Code` with scope variables. Consider using `$expr`_ instead. :Parameters: - `code`: JavaScript expression to use as a filter .. _$expr: https://docs.mongodb.com/manual/reference/operator/query/expr/ .. _$where: https://docs.mongodb.com/manual/reference/operator/query/where/ """ self.__check_okay_to_chain() if not isinstance(code, Code): code = Code(code) self.__spec["$where"] = code return self
def test_group_with_scope(self): db = self.db db.drop_collection("test") db.test.save({"a": 1}) db.test.save({"b": 1}) reduce_function = "function (obj, prev) { prev.count += inc_value; }" self.assertEqual( 2, db.test.group([], {}, {"count": 0}, Code(reduce_function, {"inc_value": 1}))[0]['count']) self.assertEqual( 4, db.test.group([], {}, {"count": 0}, Code(reduce_function, {"inc_value": 2}))[0]['count']) self.assertEqual( 1, db.test.group([], {}, {"count": 0}, Code(reduce_function, {"inc_value": 0.5}))[0]['count']) if version.at_least(db.connection, (1, 1)): self.assertEqual( 2, db.test.group([], {}, {"count": 0}, Code(reduce_function, {"inc_value": 1}), command=True)[0]['count']) self.assertEqual( 4, db.test.group([], {}, {"count": 0}, Code(reduce_function, {"inc_value": 2}), command=True)[0]['count']) self.assertEqual( 1, db.test.group([], {}, {"count": 0}, Code(reduce_function, {"inc_value": 0.5}), command=True)[0]['count'])
def _get_code(data, position, obj_end, opts): """Decode a BSON code to bson.code.Code.""" code, position = _get_string(data, position, obj_end, opts) return Code(code), position
def speed_query_(begTime, endTime, timeType, names, group, sex, minAge, maxAge): TIME_OFFSET = 8 * 60 * 60 code = "function() {" if names and names != []: code += "while(true){" for name in names: code += "if(this.name == \"" + name + "\")" code += "break;" code += "return;" code += "}" if group and group != '': code += "if(this.group.indexOf(\"" + group + "\")==-1)" code += "return;" if sex and sex != '': code += "if(this.sex != \"" + sex + "\")" code += "return;" if minAge and minAge != '': code += "if(this.age < " + str(minAge) + ")" code += "return;" if maxAge and maxAge != '': code += "if(this.age > " + str(maxAge) + ")" code += "return;" code += "if(this.time < " + begTime + ")" code += "return;" code += "if(this.time > " + endTime + ")" code += "return;" code += "var date = new Date(this.time*1000);" if timeType == TIME_TYPE_DAY: code += "var time = Date.UTC(date.getFullYear(), date.getMonth(), date.getDate());" elif timeType == TIME_TYPE_MONTH: code += "var time = Date.UTC(date.getFullYear(), date.getMonth());" elif timeType == TIME_TYPE_YEAR: code += "var time = Date.UTC(date.getFullYear(), 0);" code += "time /= 1000;" code += "time -=" + str(TIME_OFFSET) + ";" code += "emit(time, {sum: this.speed, count: 1, minSpeed: this.speed, maxSpeed: this.speed});" code += "}" # print code map = Code(code) code = "function(key, values) {" code += "var result = {sum: 0, count: 0, minSpeed: 99999, maxSpeed: 0};" code += "values.forEach( function(value) {" code += "result.sum += value.sum;" code += "result.count += value.count;" code += "if(result.maxSpeed < value.maxSpeed ){" code += "result.maxSpeed = value.maxSpeed" code += "}" code += "if(result.minSpeed > value.minSpeed){" code += "result.minSpeed = value.minSpeed" code += "}" code += "});" code += "return result;" code += "}" # print code reduce = Code(code) code = "function (key, reducedVal) {" code += "reducedVal.aveSpeed = reducedVal.sum / reducedVal.count;" code += "return reducedVal;" code += "}" # print code finalizeFunc = Code(code) result = db.rtls.map_reduce(map, reduce, finalize=finalizeFunc, out={'merge': 'result'}) ret = [] for doc in result.find(): ret.append(doc) print json.dumps(ret) return ret
def make_leaderboard(condition): """ Returns a list of user and activity counts for activity that matches the conditions of 'condition' - an object used to query the history collection. This fucntion queries and calculates for all currently matching history. """ reducer = Code(""" function(obj, prev) { // Total Points switch(obj.rev_type) { case "add text": if (obj.language !== 'he' && obj.version === "Sefaria Community Translation") { prev.count += Math.max(obj.revert_patch.length / 10, 10); prev.translateCount += 1 } else if(obj.language !== 'he') { prev.count += Math.max(obj.revert_patch.length / 400, 2); prev.addCount += 1 } else { prev.count += Math.max(obj.revert_patch.length / 800, 1); prev.addCount += 1 } break; case "edit text": prev.count += Math.max(obj.revert_patch.length / 1200, 1); prev.editCount += 1 break; case "revert text": prev.count += 1; break; case "review": prev.count += 15; prev.reviewCount += 1; break; case "add index": prev.count += 5; break; case "edit index": prev.count += 1; prev.editCount += 1 break; case "add link": prev.count += 2; prev.linkCount += 1; break; case "edit link": prev.editCount += 1 prev.count += 1; break; case "delete link": prev.count += 1; break; case "add note": prev.count += 1; prev.noteCount += 1; break; case "edit note": prev.count += 1; break; case "delete note": prev.count += 1; break; } // Texts worked on var refs = [] if ("ref" in obj && obj.ref) { refs.push(obj.ref); } else if ("refs" in obj && obj.refs[0] && obj.refs[1]) { refs.push(obj.refs[0]); refs.push(obj.refs[1]); } refs.forEach(function(ref) { var text = ref; var i = text.search(/\d/); var text = text.slice(0,i).trim() if (prev.texts[text]) { prev.texts[text] += 1; } else { prev.texts[text] = 1; } }); } """) leaders = db.history.group(['user'], condition, { 'count': 0, 'translateCount': 0, 'addCount': 0, 'editCount': 0, 'linkCount': 0, 'noteCount': 0, 'reviewCount': 0, 'texts': {} }, reducer) return sorted(leaders, key=lambda x: -x["count"])
import pymongo from bson.code import Code import datetime print(datetime.datetime.now()) mongocli = pymongo.MongoClient("mongodb://localhost:27017/") db = mongocli["bigdata"] movies = db["movies"] map = Code("function () {" " for(var i in this.cast) {" " emit({name:this.cast[i].name}, 1);" " }" "}") reduce = Code("function (key, value) {" " return Array.sum(value);" "}") result = movies.map_reduce(map, reduce, "myresults") for doc in result.find().sort("value", -1).limit(20): print(doc) print(datetime.datetime.now())
def _get_code(data, position, as_class, tz_aware, uuid_subtype, compile_re): code, position = _get_string(data, position, as_class, tz_aware, uuid_subtype, compile_re) return Code(code), position
print('{}\t{}'.format(x, counter)) print('unique_tweet_count\t{}'.format(unique_tweet_count)) """ mapper = Code(""" function () { var pronouns = ["han","hon","den","det","denna","denne","hen"]; pronouns.sort(); var tweet = this.tweet; for (var i = 0; i < pronouns.length; i++){ var replace = "[\\W \\w]?\\b" + pronouns[i] + "\\b[\\W \\w]?"; var regrex = new RegExp(replace, "ig") var num_a = []; var num_n = 0; if (tweet.match(regrex) == null) { num_n = 0; } else { num_a.push(tweet.match(regrex)); emit(pronouns[i], num_a.length); } } } """) reducer = Code(""" function (key, values) { var total = 0; for (var i = 0; i < values.length; i++) {
def getNews(**kwargs): page_num = 1 if 'page_num' in kwargs.keys(): page_num = kwargs['page_num'] start = (page_num - 1) * NEWS_PER_PAGE end = page_num * NEWS_PER_PAGE if 'user_id' not in kwargs.keys(): news_list = list( NEWS_COLLECTION.find({}, limit=MAX_NEWS, sort=[('publishedAt', -1)])) news_list = news_list[start:end] for news in news_list: del news['_id'] del news['text'] news['publishedAt'] = news['publishedAt'].isoformat() return news_list digest_list = [] if redis_client.get(kwargs['user_id']) is None: user = USER_COLLECTION.find_one({'_id': ObjectId(kwargs['user_id'])}) if user is None: return None news_list = list( NEWS_COLLECTION.find({}, limit=MAX_NEWS, sort=[('publishedAt', -1)])) # print(news_list[0]['publishedAt'].timestamp()) if 'preference' in user.keys() and len(user['preference']) > 0: time_diff = news_list[0]['publishedAt'].timestamp( ) - news_list[-1]['publishedAt'].timestamp() smooth_portion = SMOOTH_FACTOR * time_diff min_time = news_list[-1]['publishedAt'].timestamp() for news in news_list: digest_list.append( (news['digest'], ((news['publishedAt'].timestamp() - min_time + smooth_portion) / (time_diff + smooth_portion)) * user['preference'][news['class'] - 1])) # digest_list.append((news['digest'],news['publishedAt'].timestamp()*user['preference'][news['class']-1])) digest_list = sorted(digest_list, key=itemgetter(1), reverse=True) digest_list = [i[0] for i in digest_list] else: for news in news_list: digest_list.append(news['digest']) redis_client.set(kwargs['user_id'], pickle.dumps(digest_list), ex=TIMEOUT_USER_IN_SECONDS) else: digest_list = pickle.loads(redis_client.get(kwargs['user_id'])) slice_index = digest_list[start:end] # print(slice_index) # news_slice_list = list(NEWS_COLLECTION.find({ # 'digest': {'$in': slice_index} # })) #USE MapReduce to preserve order since by default it returns result in the ascending order of key map_reduce_res = NEWS_COLLECTION.map_reduce( map=Code(""" function (){ var order = inputs.indexOf(this.digest); emit(order, this); } """), reduce=Code(""" function (){} """), out={"inline": 1}, query={"digest": { "$in": slice_index }}, scope={"inputs": slice_index} # finalize=Code(""" # function(key,value){ # return value.doc; # } # """) ) news_slice_list = [news['value'] for news in map_reduce_res['results']] # print(news_slice_list) for news in news_slice_list: del news['_id'] del news['text'] news['publishedAt'] = news['publishedAt'].isoformat() return news_slice_list
def mapReduce(tskillList, toolskills, yearofexp, edulevel): collectionName = "resume" resumeCollection = DBconnection.getcollection(collectionName) newcollectionName = "mroutput" mroutputCollection = DBconnection.getcollection(newcollectionName) mapper = Code("function () {" "for (index in this.WorkExperience) {" "emit(this.Email,this.WorkExperience[index]);" "}" "}") reducer = Code("function(email, workex) {" "var regex1 = new RegExp('collaborated', 'i');" "var regex2 = new RegExp('managed', 'i');" "var regex3 = new RegExp('developed', 'i');" "var regex4 = new RegExp('designed', 'i');" "var regex5 = new RegExp('initiated', 'i');" "total = 0;" "lscore = 0;" "twscore = 0;" "ascore = 0;" "if(regex1.test(workex))" "lscore += 1;" "twscore += 1;" "if(regex2.test(workex))" "lscore += 1;" "twscore += 1;" "if(regex3.test(workex))" "ascore += 1;" "if(regex4.test(workex))" "ascore += 1;" "if(regex5.test(workex))" "lscore += 1;" "ascore += 1;" "total = lscore + twscore + ascore-1;" "return total" "}") resumeCollection.map_reduce(mapper, reducer, "mroutput", query={ "TechSkills": { "$all": tskillList }, "ToolSkills": { "$all": toolskills }, "EducationLevel": edulevel, "yearsOfExperience": yearofexp }) # Getting the values of the mapreduce result outputlist = [] for doc in mroutputCollection.find(): outputlist.append(doc) return (outputlist)
from pymongo import MongoClient from bson.code import Code import pprint client = MongoClient('mongodb://localhost:27017') db = client['data'] mapper = Code(""" function() { var value = {} value.val = this.value_of_goods_in_rupees value.desc = this.description_of_goods emit(this.port_or_country_of_origin, value); } """) reducer = Code(""" function (key, vals) { var max = 0.0; var desc = ""; for(var i = 0; i < vals.length; i++ ){ if(vals[i] > max){ max = vals[i].val; desc = descs[i].desc; } } return desc; } """) result = db.imports.map_reduce(mapper, reducer, "import_port")
from pymongo import MongoClient from bson.code import Code import sys if __name__ == '__main__': client = MongoClient("mongodb://localhost:27017/") db = client.project_542 mapper = Code(""" function(){ if (this.Mods){ for (key in this.Mods){ emit(key, null); } } } """) reducer = Code(""" function(key, values){ return null; } """) result = db.posts.map_reduce(mapper, reducer, "myresults") Mods_list = [] f = open('mods.txt', 'w') for n in result.find(): f.write("'") f.write(n['_id'])
def _parse_canonical_code(doc): """Decode a JSON code to bson.code.Code.""" for key in doc: if key not in ('$code', '$scope'): raise TypeError('Bad $code, extra field(s): %s' % (doc, )) return Code(doc['$code'], scope=doc.get('$scope'))
def _get_code_w_scope(data, position, obj_end, opts, element_name): """Decode a BSON code_w_scope to bson.code.Code.""" code, position = _get_string(data, position + 4, obj_end, opts, element_name) scope, position = _get_object(data, position, obj_end, opts, element_name) return Code(code, scope), position
client = MongoClient(args.host) # SQL database database = client[args.database] # SQL table collection = database[args.collection] # http://docs.mongodb.org/manual/reference/method/db.collection.mapReduce/ from bson.code import Code mapper = None if args.value_array_type: mapper = Code(""" function() { this.""" + str(args.key) + """.forEach(function(z) { (""+z).split(/[\s\[\],\(\)"\.]+/).forEach(function(v) { if(v && v.length ) emit(v, {'count':1}); }); }); } """) else: #mapper = Code ( mapper = """ function() { var func = { 'author':function() { return 'changyy'; } }; (""+this.""" + str( args.key) + """).split(/[\s\[\],\(\)"\.]+/).forEach(function(v){
def update_stats(verbose=True): from data_mgt.utilities.rewrite import update_attribute_stats from bson.code import Code ec = C.elliptic_curves ecdbstats = ec.nfcurves.stats # get list of degrees degrees = nfcurves.distinct('degree') if verbose: print("degrees: {}".format(degrees)) # get list of signatures for each degree. Note that it would not # work to use nfcurves.find({'degree':d}).distinct('signature') # since 'signature' is currently a list of integers an mongo would # return a list of integers, not a list of lists. With hindsight # it would have been better to store the signature as a string. if verbose: print("Adding signatures_by_degree") reducer = Code("""function(key,values){return Array.sum(values);}""") attr = 'signature' mapper = Code("""function(){emit(""+this.""" + attr + """,1);}""") sigs_by_deg = {} for d in degrees: sigs_by_deg[str(d)] = [ r['_id'] for r in nfcurves.inline_map_reduce( mapper, reducer, query={'degree': d}) ] if verbose: print("degree {} has signatures {}".format(d, sigs_by_deg[str(d)])) entry = {'_id': 'signatures_by_degree'} ecdbstats.delete_one(entry) entry.update(sigs_by_deg) ecdbstats.insert_one(entry) # get list of fields for each signature. Simple code here faster than map/reduce if verbose: print("Adding fields_by_signature") from sage.misc.flatten import flatten sigs = flatten(sigs_by_deg.values()) fields_by_sig = dict([ sig, nfcurves.find({ 'signature': [int(x) for x in sig.split(",")] }).distinct('field_label') ] for sig in sigs) entry = {'_id': 'fields_by_signature'} ecdbstats.delete_one(entry) entry.update(fields_by_sig) ecdbstats.insert_one(entry) # get list of fields for each degree if verbose: print("Adding fields_by_degree") fields_by_deg = dict( [str(d), sorted(nfcurves.find({ 'degree': d }).distinct('field_label'))] for d in degrees) entry = {'_id': 'fields_by_degree'} ecdbstats.delete_one(entry) entry.update(fields_by_deg) ecdbstats.insert_one(entry) fields = flatten(fields_by_deg.values()) if verbose: print("{} fields, {} signatures, {} degrees".format( len(fields), len(sigs), len(degrees))) if verbose: print("Adding curve counts for torsion order, torsion structure") update_attribute_stats(ec, 'nfcurves', ['torsion_order', 'torsion_structure']) if verbose: print("Adding curve counts by degree, signature and field") update_attribute_stats(ec, 'nfcurves', ['degree', 'signature', 'field_label']) if verbose: print("Adding class counts by degree, signature and field") update_attribute_stats(ec, 'nfcurves', ['degree', 'signature', 'field_label'], prefix="classes", filter={'number': int(1)}) # conductor norm ranges: # total: if verbose: print("Adding curve and class counts and conductor range") norms = ec.nfcurves.distinct('conductor_norm') data = { 'ncurves': ec.nfcurves.count(), 'nclasses': ec.nfcurves.find({ 'number': 1 }).count(), 'min_norm': min(norms), 'max_norm': max(norms), } entry = {'_id': 'conductor_norm'} ecdbstats.delete_one(entry) entry.update(data) ecdbstats.insert_one(entry) # by degree: if verbose: print("Adding curve and class counts and conductor range, by degree") degree_data = {} for d in degrees: query = {'degree': d} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) degree_data[str(d)] = { 'ncurves': ncurves, 'nclasses': nclasses, 'min_norm': min_norm, 'max_norm': max_norm, } entry = {'_id': 'conductor_norm_by_degree'} ecdbstats.delete_one(entry) entry.update(degree_data) ecdbstats.insert_one(entry) # by signature: if verbose: print( "Adding curve and class counts and conductor range, by signature") sig_data = {} for sig in sigs: query = {'signature': [int(c) for c in sig.split(",")]} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) sig_data[sig] = { 'ncurves': ncurves, 'nclasses': nclasses, 'min_norm': min_norm, 'max_norm': max_norm, } entry = {'_id': 'conductor_norm_by_signature'} ecdbstats.delete_one(entry) entry.update(sig_data) ecdbstats.insert_one(entry) # by field: if verbose: print("Adding curve and class counts and conductor range, by field") entry = {'_id': 'conductor_norm_by_field'} ecdbstats.delete_one(entry) field_data = {} for f in fields: ff = f.replace(".", ":") # mongo does not allow "." in key strings query = {'field_label': f} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) field_data[ff] = { 'ncurves': ncurves, 'nclasses': nclasses, 'min_norm': min_norm, 'max_norm': max_norm, } entry = {'_id': 'conductor_norm_by_field'} ecdbstats.delete_one(entry) entry.update(field_data) ecdbstats.insert_one(entry)
def mapReduce(collection): map = Code("function() {emit(this.word+' : '+this.sentiment,1);};") reduce = Code("function(word,count) {return Array.sum(count);};") collection.map_reduce(map, reduce, "words_frequency") return
def __setattr__(self, name, code): self._db.system.js.replace_one({"_id": name}, { "_id": name, "value": Code(code) }, True)
def prueba(request): #Código de mongodb client = MongoClient(ip, port) db = client['twitterdata'] tweetCollection = db['tweets'] for i in range(20): result2 = tweetCollection.map_reduce(mapCuentaDias, reduce,"myresult2", full_response = True) print result2 client.close() #return the template return render_to_response('userStatistics.html', locals()) mapRT = Code('function(){' 'if (this.retweeted_status){' 'emit(this.text, this.retweeted_status.retweet_count);' '}' '}') reduceRT = Code("function (key, values) {" " var total = Math.max(values);" " return total;" "}") mapUser = Code('function(){' ' emit(this.user.screen_name, 1);' '}') mapPopular = Code('function(){' 'var palabras = new Array;' 'palabras = this.text.split(" ");'
def __setattr__(self, name, code): self._db.system.js.save({"_id": name, "value": Code(code)}, safe=True)
init_kafka() if __name__ == '__main__': # user_id = int(sys.argv[1]) # type = sys.argv[2] # send_message(user_id, type) mongo = db.connect_mongo() # items = mongo.message.user_message.find({"topicMessageId":39658}).sort("_id", 1) # for item in items: # logger.info("%s - %s", item["_id"].generation_time.strftime("%Y/%m/%d %H:%M:%S"), item["userId"]) mapper_topic_message = Code(""" function () { if(!!this.topicMessageId){ emit(this.userId + "-" + this.topicMessageId, 1); } } """) mapper_company_message = Code(""" function () { if(!!this.companyMessageId){ emit(this.userId + "-" + this.companyMessageId, 1); } } """) reducer = Code(""" function (key, values) { var total = 0;
def execute(trial=False): startTime = datetime.datetime.now() # Setup the database connection # client = dml.pymongo.MongoClient() # repo = client.repo # repo.authenticate(getAuth('db_username'), getAuth('db_password')) # Connect to the Database repo = openDb(getAuth("db_username"), getAuth("db_password")) # get the collection earnings = repo['aydenbu_huangyh.earningsReport'] # MaoReduce function mapper = Code(""" function() { var k = this.postal; if (k != null) { k = k.substring(1); var v = {count:1, totalEarnings:parseFloat(this.total_earnings), avg:parseFloat(this.total_earnings)}; emit(k, v) } } """) reducer = Code(""" function(k, vs) { reduceVal = {count:0, totalEarnings:0, avg:0}; for (var i = 0; i < vs.length; i++) { reduceVal.count += vs[i].count; reduceVal.totalEarnings += parseFloat(vs[i].totalEarnings); } reduceVal.avg = (reduceVal.totalEarnings/reduceVal.count).toFixed(2); return reduceVal; } """) repo.dropPermanent("zip_avg_earnings") result = earnings.map_reduce(mapper, reducer, "aydenbu_huangyh.zip_avg_earnings") zip_avg_earnings = repo['aydenbu_huangyh.zip_avg_earnings'] # Remove the unrelated field, only keep the avg field zip_avg_earnings.update( {}, {'$unset': { 'value.count': '', 'value.totalEarnings': '' }}, multi=True, upsert=False) ''' # Save the result to the db # repo.dropPermanent("zip_hospitals_count") #repo.createPermanent("zip_hospitals_count") # repo['aydenbu_huangyh.zip_hospitals_count'].insert_many(result) ''' repo.logout() endTime = datetime.datetime.now() return {"start": startTime, "end": endTime}
def __getattr__(self, name): return lambda *args: self._db.eval(Code("function() { " "return this[name].apply(" "this, arguments); }", scope={'name': name}), *args)
def _get_code(data, view, position, obj_end, opts, element_name): """Decode a BSON code to bson.code.Code.""" code, position = _get_string(data, view, position, obj_end, opts, element_name) return Code(code), position
def object_hook(dct): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: dtm = dct["$date"] # mongoexport 2.6 and newer if isinstance(dtm, string_type): aware = datetime.datetime.strptime( dtm[:23], "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=utc) offset = dtm[23:] if not offset or offset == 'Z': # UTC return aware else: if len(offset) == 5: # Offset from mongoexport is in format (+|-)HHMM secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60) elif ':' in offset and len(offset) == 6: # RFC-3339 format (+|-)HH:MM hours, minutes = offset[1:].split(':') secs = (int(hours) * 3600 + int(minutes) * 60) else: # Not RFC-3339 compliant or mongoexport output. raise ValueError("invalid format for offset") if offset[0] == "-": secs *= -1 return aware - datetime.timedelta(seconds=secs) # mongoexport 2.6 and newer, time before the epoch (SERVER-15275) elif isinstance(dtm, collections.Mapping): secs = float(dtm["$numberLong"]) / 1000.0 # mongoexport before 2.6 else: secs = float(dtm) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%02x" % dct["$type"] subtype = int(dct["$type"], 16) if subtype >= 0xffffff80: # Handle mongoexport values subtype = int(dct["$type"][6:], 16) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if "$uuid" in dct: return uuid.UUID(dct["$uuid"]) if "$undefined" in dct: return None if "$numberLong" in dct: return Int64(dct["$numberLong"]) if "$timestamp" in dct: tsp = dct["$timestamp"] return Timestamp(tsp["t"], tsp["i"]) return dct
def object_hook(dct, compile_re=True): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: dtm = dct["$date"] # mongoexport 2.6 and newer if isinstance(dtm, str): # datetime.datetime.strptime is new in python 2.5 naive = datetime.datetime( *(time.strptime(dtm[:19], "%Y-%m-%dT%H:%M:%S")[0:6])) # The %f format is new in python 2.6 micros = int(dtm[20:23]) * 1000 aware = naive.replace(microsecond=micros, tzinfo=utc) offset = dtm[23:] if not offset or offset == 'Z': # UTC return aware else: if len(offset) == 5: # Offset from mongoexport is in format (+|-)HHMM secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60) elif ':' in offset and len(offset) == 6: # RFC-3339 format (+|-)HH:MM hours, minutes = offset[1:].split(':') secs = (int(hours) * 3600 + int(minutes) * 60) else: # Not RFC-3339 compliant or mongoexport output. raise ValueError("invalid format for offset") if offset[0] == "-": secs *= -1 return aware - datetime.timedelta(seconds=secs) # mongoexport 2.6 and newer, time before the epoch (SERVER-15275) elif isinstance(dtm, dict): secs = float(dtm["$numberLong"]) / 1000.0 # mongoexport before 2.6 else: secs = float(dtm) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) if compile_re: return re.compile(dct["$regex"], flags) else: return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%02x" % dct["$type"] subtype = int(dct["$type"], 16) if subtype >= 0xffffff80: # Handle mongoexport values subtype = int(dct["$type"][6:], 16) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if bson.has_uuid() and "$uuid" in dct: return bson.uuid.UUID(dct["$uuid"]) if "$undefined" in dct: return None if "$numberLong" in dct: # 2to3 will change this to int. PyMongo 3.0 supports # a new type, Int64, to avoid round trip issues. return int(dct["$numberLong"]) if "$timestamp" in dct: tsp = dct["$timestamp"] return Timestamp(tsp["t"], tsp["i"]) return dct
def get(self, topic): client = MongoClient('bigdata-mongodb-01', 27017) db = client['Grupo10'] mine = db['tweets'] map = Code("""function() {var text = this.text; text = text.replace('.',' '); text = text.replace(',',' '); text = text.replace('(',' '); text = text.replace(')',' '); text = text.replace(':',' '); var wordArr = text.toLowerCase().split(' '); var stoppedwords = 'el, la, de, es, a, un, una, que, de, por, para, como, al, ?, !, +, y, no, los, las, en, se, lo, con, o, del, q, su, //t, https, si, mas, le, cuando, ellos, este, son, tan, esa, eso, ha, sus, e, pero, porque, tienen, d'; var stoppedwordsobj = []; var uncommonArr = []; stoppedwords = stoppedwords.split(','); for (i = 0; i < stoppedwords.length; i++ ) {stoppedwordsobj[stoppedwords[i].trim()] = true;} for ( i = 0; i < wordArr.length; i++ ) {word = wordArr[i].trim().toLowerCase(); if ( !stoppedwordsobj[word] ) {uncommonArr.push(word);}} for (var i = uncommonArr.length - 1; i >= 0; i--) {if (uncommonArr[i]) {if (uncommonArr[i].startsWith("#")) {emit(uncommonArr[i], 1);}}}}""" ) reduce = Code("""function( key, values ) { var count = 0; values.forEach(function(v) { count +=v; }); return count; }""") regx = re.compile(topic, re.IGNORECASE) result = mine.map_reduce(map, reduce, "myresults", query={"text": regx}) json_result = [] for doc in result.find(): json_result.append(doc) d = path.dirname(__file__) col_mask = np.array(Image.open(path.join(d, "images/col.png"))) #text = "y es es es es es es es es es es es es forcing the closing of the figure window in my giant loop, so I do" #text = open(path.join(d, 'images/red.txt')).read() jsonCloud = json.loads(dumps(json_result)) text = "" for item in jsonCloud: for x in xrange(1, int(item['value'] * 2)): text += " " + item['_id'] wordcloud = WordCloud(width=1000, height=800, max_font_size=1000).generate(text) #wordcloud = WordCloud(mask=col_mask, max_font_size=1000).generate(text) #fig = plt.figure(figsize=(4.2,6.2)) fig = plt.figure(figsize=(20, 10)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") fig.savefig('images/foo.png', facecolor='k', bbox_inches='tight') self.write(dumps(json_result))