예제 #1
0
def _get_code_w_scope(data, view, position, obj_end, opts, element_name):
    """Decode a BSON code_w_scope to bson.code.Code."""
    code_end = position + _UNPACK_INT_FROM(data, position)[0]
    code, position = _get_string(data, view, position + 4, code_end, opts,
                                 element_name)
    scope, position = _get_object(data, view, position, code_end, opts,
                                  element_name)
    if position != code_end:
        raise InvalidBSON('scope outside of javascript code boundaries')
    return Code(code, scope), position
예제 #2
0
 def test_code(self):
     a_string = "hello world"
     a_code = Code("hello world")
     self.assertTrue(a_code.startswith("hello"))
     self.assertTrue(a_code.endswith("world"))
     self.assertTrue(isinstance(a_code, Code))
     self.assertFalse(isinstance(a_string, Code))
     self.assertEqual(a_code.scope, {})
     a_code.scope["my_var"] = 5
     self.assertEqual(a_code.scope, {"my_var": 5})
예제 #3
0
def analyze_tweets():
    if mapcollection not in db.collection_names():
        map_function = Code(open('mapReduce/mapFunctionTweets.js', 'r').read())
        reduce_function = Code(open('mapReduce/reduceFunctionTweets.js', 'r').read())
        collection.map_reduce(map_function, reduce_function, out=mapcollection)

    json_file = open(output, 'wb')
    json_file.write('[')
    first = True

    for doc in db[mapcollection].find().sort([('value', -1)]).limit(50):
        if first:
            first = False
        else:
            json_file.write(',')
        json_file.write(json.dumps(doc, indent=2, default=json_util.default))

    json_file.write(']')
    json_file.close()
예제 #4
0
    def get_statistics( self, request, **kwargs ):
        '''
            Get "statistics" aim is to (ultimately) run MapReduce functions on
            the set of data in a specific repository.
        '''
        day_map = Code( '''
            function() {
                day = Date.UTC( this.timestamp.getFullYear(), this.timestamp.getMonth(), this.timestamp.getDate() );
                emit( { day: day }, { count: 1 } )
            }''')

        day_reduce = Code( '''
            function( key, values ) {
                var count = 0;
                values.forEach( function( v ) {
                    count += v[ 'count' ];
                });
                return {count: count};
            }''')

        # Grab the survey that we're querying survey data for
        repo_filter = { 'repo': ObjectId( kwargs.get( 'mongo_id' ) ) }

        cursor = db.data.find( repo_filter )

        first = db.data.find_one( repo_filter, sort=[( '_id', pymongo.ASCENDING )] )
        last  = db.data.find_one( repo_filter, sort=[( '_id', pymongo.DESCENDING )] )

        count_by_day = []
        result = db.data.map_reduce( day_map, day_reduce, "myresults", query=repo_filter )
        for doc in result.find():
            count_by_day.append( {
                'day':      doc[ '_id' ][ 'day' ],
                'value':    doc[ 'value' ][ 'count' ] })

        stats = {
            'total_count': cursor.count(),
            'count_by_day': count_by_day,
            'first_submission': first,
            'last_submission': last,
        }

        return self.create_response( request, stats )
예제 #5
0
def get_phrase_heading_counts(phrase,
                              speakername=None,
                              how_many=25,
                              from_date=None,
                              to_date=None):
    ''' A list of headings by number of occurrences for a phrase '''
    query = {"phrase": phrase}
    if from_date and to_date:
        query["date"] = {"$gte": from_date, "$lte": to_date}
    if speakername:
        query["speakername"] = speakername
    map = Code(
        "function () {"
        "   emit(this.headingtitle.substring(0,64) + ' ('+ this.date + ')',1);"
        "}")
    reduce = Code("function (key, values) {" "   return Array.sum(values)" "}")
    results = db.phrases.map_reduce(map, reduce, "results", query=query)
    for doc in results.find().sort("value", -1).limit(how_many):
        yield doc
예제 #6
0
def mapReduce():
    db = client.Corpus
    tweets = db.tweets
    map = Code("function() {emit(this.via,1);}")
    reduce = Code("""function(key, values) { 
                        var res=0;
                        values.forEach(function(v){res +=1})
                        return {count: res};
                        }""")

    result = tweets.map_reduce(map,
                               reduce,
                               "via_count",
                               query={"sentiment": 2},
                               limit=10)
    print result

    for doc in db.via_count.find():
        print(doc)
예제 #7
0
def get_phrases_containing(fragment,
                           how_many=25,
                           from_date=None,
                           to_date=None,
                           speakername=None):
    ''' A list of phrases containing some text '''
    #query = {"phrase":re.compile(".*"+fragment+".*", re.IGNORECASE)}
    query = {
        "phrase": re.compile("(^|\s)(" + fragment + ")($|\s)", re.IGNORECASE)
    }
    if from_date and to_date:
        query["date"] = {"$gte": from_date, "$lte": to_date}
    if speakername:
        query["speakername"] = speakername
    map = Code("function () {" "   emit(this.phrase,1);" "}")
    reduce = Code("function (key, values) {" "   return Array.sum(values)" "}")
    results = db.phrases.map_reduce(map, reduce, "results", query=query)
    for doc in results.find().sort("value", -1).limit(how_many):
        yield doc
예제 #8
0
    def group(self, keys, initial, reduce, condition=None, finalize=None, **kwargs):
        body = {
            "ns": self._collection_name,
            "initial": initial,
            "$reduce": Code(reduce),
        }

        if isinstance(keys, (bytes, unicode)):
            body["$keyf"] = Code(keys)
        else:
            body["key"] = self._normalize_fields_projection(keys)

        if condition:
            body["cond"] = condition
        if finalize:
            body["finalize"] = Code(finalize)

        result = yield self._database.command("group", body, **kwargs)
        defer.returnValue(result)
    def snippet(self, _input):
        """Takes in JavaScript string and code to function.

        Args:
            _input(:obj:`str`): JavaScript string.

        Return:
            (:obj:`bson.code.Code`)
        """
        return Code(_input)
예제 #10
0
def object_hook(dct, compile_re=True):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$numberLong" in dct:
        return int(dct["$numberLong"])
    if "$decimal" in dct:
        v = str(dct["$decimal"])
        if "$precision" in dct:
            precision = dct["$precision"][0]
            scale = dct["$precision"][1]
            d = Decimal(v, precision, scale)
        else:
            d = Decimal(v)
        return d
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        try:
            secs = float(dct["$date"]) / 1000.0
            return EPOCH_AWARE + datetime.timedelta(seconds=secs)
        except ValueError:
            return datetime.datetime.strptime(dct["$date"], "%Y-%m-%d")
    if "$timestamp" in dct:
        try:
            ms = long_type(dct["$timestamp"])
            return Timestamp(ms / 1000, ms % 1000 * 1000)
        except ValueError:
            dt = datetime.datetime.strptime(dct["$timestamp"],
                                            "%Y-%m-%d-%H.%M.%S.%f")
            secs = long_type(time.mktime(dt.timetuple()))
            return Timestamp(secs, dt.microsecond)
    if "$regex" in dct:
        flags = 0
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)

        if compile_re:
            return re.compile(dct["$regex"], flags)
        else:
            return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        if isinstance(dct["$type"], int):
            dct["$type"] = "%d" % dct["$type"]
        subtype = int(dct["$type"])
        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if bson.has_uuid() and "$uuid" in dct:
        return bson.uuid.UUID(dct["$uuid"])
    return dct
예제 #11
0
def getEndangeredSpecies():
    global client, db, continentEntry
    continent = endangeredEntry.get()
    res = 0
    list = []

    Species_collection = db.get_collection("endangered_species")
    for post in Species_collection.find({"continent": continent}, {
            "_id": 0,
            "animals_endangered": 1,
            "examples": 2
    }):
        res = post["animals_endangered"]
        list = post["examples"]

    if continent == "":
        map = Code("function () {"
                   " emit('endangered', this.animals_endangered); "
                   "}")
        reduce = Code("function(key, values) {"
                      " return Array.sum(values);"
                      "}")
        res = Species_collection.map_reduce(map=map,
                                            reduce=reduce,
                                            out=SON([('inline', 1)]))
        res = res["results"][0]["value"]

        ans = "Total endangered species: " + str(int(res))
        label = Label(window, text=ans)
        label.grid(column=3, row=7)
        return

    examples = ", ".join(list)
    examples = "counting: " + examples
    ans = "Total endangered species: " + str(int(res))
    label = Label(window, text=ans)

    label.grid(column=3, row=7)

    examplelabel = Label(window, text=examples)
    # examplelabel.grid_forget()
    examplelabel.grid(column=3, row=8)
예제 #12
0
파일: handler.py 프로젝트: adrymyry/bdge
def rq1_mapred_handler(event, context):

    start_time = timeit.default_timer()

    client = MongoClient(MONGO_SERVER, MONGO_PORT)
    db = client.stackoverflow

    rq1_map = Code("""
    function () {
        if (this.PostTypeId == 1) {
            emit(this.OwnerUserId, 1);
        }
    }
    """)

    rq1_reduce = Code('''
    function (key, values)
    {
        return Array.sum(values);
    }
    ''')

    db.posts.map_reduce(rq1_map, rq1_reduce, out='rq1')

    rq1_map2 = Code("""
    function () {
        emit(this.Id, 0);
    }
    """)

    rq1_reduce2 = Code("""
    function (key, values) {
        return Array.sum(values);
    }
    """)

    db.users.map_reduce(rq1_map2, rq1_reduce2, out={'reduce': 'rq1'})

    rq1MR = list(db.rq1.find())

    elapsed = timeit.default_timer() - start_time
    return make_response(rq1MR, elapsed)
예제 #13
0
    def execute(trial=False):
        print("starting data retrieval")
        startTime = datetime.datetime.now()
        client = dml.pymongo.MongoClient()
        repo = client.repo
        print("repo: ", repo)
        repo.authenticate('jzhou94_katerin', 'jzhou94_katerin')

        map_function_avg_earnings = Code('''function() {
            if (this.postal*1 > 2100 && this.postal*1 < 2300 && this.title == "Police Officer")
            emit(this.postal, {tot:this.total_earnings, n: 1, avg: this.total_earnings});
            }''')

        reduce_function_avg_earnings = Code('''function(k, vs) {            
            var total = 0;
            var counts = 0;
            for (var i = 0; i < vs.length; i++)
            total += (vs[i].tot*1);
            for (var i = 0; i < vs.length; i++)
            counts += vs[i].n;
            
            return {tot:total.toFixed(2), n: counts, avg: (total/counts).toFixed(2)};
            }''')

        repo.dropPermanent('jzhou94_katerin.avg_earnings')
        repo.createPermanent('jzhou94_katerin.avg_earnings')

        if trial == True:
            repo['jzhou94_katerin.avg_earnings'].insert(
                repo.jzhou94_katerin.employee_earnings.find().limit(100))
            repo.jzhou94_katerin.avg_earnings.map_reduce(
                map_function_avg_earnings, reduce_function_avg_earnings,
                'jzhou94_katerin.avg_earnings')
        else:
            repo.jzhou94_katerin.employee_earnings.map_reduce(
                map_function_avg_earnings, reduce_function_avg_earnings,
                'jzhou94_katerin.avg_earnings')

        repo.logout()

        endTime = datetime.datetime.now()
        return {"start": startTime, "end": endTime}
예제 #14
0
def get_online_report():
    """获取在线报告"""
    ses = get_conn("gps_info")
    now = datetime.datetime.now()
    year = now.year
    month = now.month
    begin = get_datetime_from_str("{}-{}-1 0:0:0".format(year, month))
    # begin = now - datetime.timedelta(days=30)
    query = {"time": {"$gte": begin}}
    s = {"time": -1}
    out = "online_report_result"  # 保存数据的表,每次map_reduce都会提前清空这个表
    map_func = Code("""
    function(){
        emit(this.user_id.$id, 1);
    }
    """)
    reduce_func = Code("""
    function(key, values){
        return Array.sum(values);
    }
    """)
    result_conn = ses.map_reduce(map=map_func,
                                 reduce=reduce_func,
                                 query=query,
                                 sort=s,
                                 out=out,
                                 full_response=False)
    res = result_conn.find(filter=dict())
    count_dict = {x['_id']: int(x['value']) for x in res}
    ids = list(count_dict.keys())
    ses = get_conn("user_info")
    f = {"_id": {"$in": ids}}
    s = [("last_update", -1)]
    users = ses.find(filter=f, sort=s)
    res = list()
    for user in users:
        user_id = user['_id']
        temp = to_flat_dict(user)
        temp['count'] = count_dict[user_id]
        res.append(temp)
    res.sort(key=lambda obj: obj['count'], reverse=True)
    return res
예제 #15
0
    def get(self):

        map = Code("function () {"
                        "   var date = new Date(this.created_t*1000);"
                        "   var parsedDateMonth = date.getMonth();"
                        "   var parsedDateYear = date.getFullYear();"
                        "   var saltLevelsValue = null;"
                        "   var fatLevelsValue = null;"
                        "   var saturatedfatLevelsValue = null;"
                        "   var sugarsLevelsValue = null;"
                        "   if(!parsedDateYear || !parsedDateMonth) return;"
                        "   if(this.hasOwnProperty('nutrient_levels')) {"
                        "       saltLevelsValue = this.nutrient_levels.salt;"
                        "       fatLevelsValue = this.nutrient_levels.fat;"
                        "       saturatedfatLevelsValue = this['nutrient_levels']['saturated-fat'];"
                        "       sugarsLevelsValue = this.nutrient_levels.sugars;"
                        "   } else { saltLevelsValue = null; fatLevelsValue = null; saturatedfatLevelsValue = null, sugarsLevelsValue = null}"
                        "   emit({year : parsedDateYear, month : parsedDateMonth, saltlevels : saltLevelsValue, fatlevels : fatLevelsValue, saturatedfatlevels : saturatedfatLevelsValue, sugarslevels : sugarsLevelsValue}, {count:1});"
                        "};")

        reduce = Code("function (key, values) {"
                    "   var count = 0;"
                    "   var ret = {count : 0};"
                    "       for (index in values) {"
                    "           ret.count += values[index].count;"
                    "       }"
                    "       return ret;"    
                    "   };")

        listRes = []
        result = mongo.db.products.map_reduce(map, reduce, "stats_products")
        for doc in result.find():
            res = {}
            res['dateyear'] = doc['_id']['year']
            res['datemonth'] = doc['_id']['month']
            res['count'] = doc['value']['count']
            res['saltlevels'] = doc['_id']['saltlevels']
            res['saturatedfatlevels'] = doc['_id']['saturatedfatlevels']
            res['sugarslevels'] = doc['_id']['sugarslevels']
            res['fatlevels'] = doc['_id']['fatlevels']
            listRes.append(res)
        return listRes
예제 #16
0
def word_count(coll):

    result = coll.find({}, {"text": 1}).limit(2)
    #     pprint.pprint(list(result) )

    mapper = Code(""" 
                function () { 
                    txt=this.text ;

                    wrds =  txt.split(" ");
                    for ( i=0 ; i < wrds.length ; i++) {
                        if (  wrds[i].length >1  ) { 
                            wrd=wrds[i].toLowerCase().replace('"|,',' ');
                            wrd=wrd.replace('(\\n)+','');
                            wrd=wrd.replace('.|;|,)/gi','');
                            emit(wrd,  1  );
                            }
                    }

                }                
            """)

    reducer = Code(""" 
                function (key, values) {
                    result = {
                        count : 0 
                    };            
                    values.forEach  ( function(v) { result.count += 1; } ) 
                    return {result};
                }
    """)

    print("\n\n")
    pprint.pprint(coll)
    result = coll.map_reduce(mapper,
                             reducer,
                             "word_cnt",
                             query={
                                 "sender": "*****@*****.**"
                             }).find()
    for doc in result:
        print(doc)
예제 #17
0
 def _map_reduce(self, coll, mapreduce, spec=None):
     """
     Perform map/reduce operation over DAS cache using provided
     collection, mapreduce name and optional conditions.
     """
     self.logger.debug("(%s, %s)" % (mapreduce, spec))
     record = find_one(self.mrcol, {'name': mapreduce})
     if not record:
         raise Exception("Map/reduce function '%s' not found" % mapreduce)
     fmap = record['map']
     freduce = record['reduce']
     if spec:
         result = coll.map_reduce(Code(fmap), Code(freduce), query=spec)
     else:
         result = coll.map_reduce(Code(fmap), Code(freduce))
     msg = "found %s records in %s" % (result.count(), result.name)
     self.logger.info(msg)
     self.logger.debug(fmap)
     self.logger.debug(freduce)
     return result
예제 #18
0
 def photographer_shots_count():
     mapper = Code("""
   function() {
     emit(this.photographer, 1);
   }
 """)
     reducer = Code("""
   function(k, vs) {
     return Array.sum(vs);
   }
 """)
     raw = mongo_client.photo.photo.map_reduce(mapper, reducer,
                                               "photographer_phs").find()
     res = []
     for kv in raw:
         res.append({
             'name': Photographer.find_by_id(kv['_id'])['name'],
             'count': kv['value']
         })
     return res
예제 #19
0
def updateDocFreq(patDB, outColName='corpusDict'):

    map = Code(open('fresh_docFreqMap.js').read())
    reduce = Code(open('fresh_docFreqReduce.js').read())

    # finIDF will calculate IDF scores, so I have to pass it
    # the number of total docs in a crafty way.
    size = patDB.patns.count()
    # replace all instances of TOTALDOCS with size in docFreqFinalize.js
    finIDF = open('docFreqFinalize.js').read()
    finIDF = finIDF.replace('TOTALDOCS', str(size))

    # patDB.patns.map_reduce(map, reduce, outColName, finalize=finIDF)
    # can either reduce into outColName or replace it. I like replace for now
    ## I changed 'out' from 'replace' to 'reduce', that should combine the results
    ## uh-oh, I'm not sure if that works, because of the way docFreqFinalize works
    patDB.patns.map_reduce(map,
                           reduce,
                           out={'reduce': outColName},
                           finalize=finIDF)
예제 #20
0
 def setUp(self):
     """Set up class."""
     from bson.code import Code
     self.encoder = GoodJSONEncoder()
     self.expected = {
         "code": "console.log('HAAAAAAAAHHHH!!!')",
         "scope": {
             "data": "test"
         }
     }
     self.data = Code(**self.expected)
예제 #21
0
 def test__group3(self):
     reducer=Code("""
         function(obj, result) {result.count+=1 }
         """)
     conditions = {
                 'foo':{'$in':[self._id1]},
                 }
     self.cmp.compare.group(key=['foo'], 
                            condition=conditions, 
                            initial={"count": 0}, 
                            reduce=reducer)
예제 #22
0
def cutoffSL(doc, cutoff):
    m = Code("function(){ emit( this.len , { count : 1 } );}")
    r = Code(
        "function (key, values) { var count = 0; values.forEach(function (v) {count += v.count;}); return {count: count}; }"
    )
    if Pippies.count() > 0:
        lens = dict([(x['_id'], int(x['value']['count']))
                     for x in Pippies.map_reduce(
                         m, r, 'cutoff sparkline', query={
                             'docs': doc._id
                         }).find()])
    else:
        lens = {}
    if lens.keys():
        return [
            str(lens[x]) if x in lens else '0'
            for x in xrange(int(max(lens.keys()) + 1))
        ][4:cutoff]
    else:
        return []
예제 #23
0
    def sum(self, field):
        """Sum over the values of the specified field.

        :param field: the field to sum over; use dot-notation to refer to
            embedded document fields
        """
        map_func = """
            function() {
                var path = '{{~%(field)s}}'.split('.'),
                field = this;

                for (p in path) {
                    if (typeof field != 'undefined')
                       field = field[path[p]];
                    else
                       break;
                }

                if (field && field.constructor == Array) {
                    field.forEach(function(item) {
                        emit(1, item||0);
                    });
                } else if (typeof field != 'undefined') {
                    emit(1, field||0);
                }
            }
        """ % dict(field=field)

        reduce_func = Code("""
            function(key, values) {
                var sum = 0;
                for (var i in values) {
                    sum += values[i];
                }
                return sum;
            }
        """)

        mr_future = self.inline_map_reduce(map_func, reduce_func)
        future = get_future(self)

        def sum_cb(mr_future):
            results = mr_future.result()

            for result in results:
                r = result.value
                break
            else:
                r = 0

            future.set_result(r)

        mr_future.add_done_callback(sum_cb)
        return future
예제 #24
0
파일: comber.py 프로젝트: s977043/ALife-Lab
def top_n_map(n):
    return Code('''
	function() {
	  var to_return = Math.min(%d, this.sorted_text.length);
	  var out_arr = [];
	  for (var i = 0; i < to_return; i++) {
	    out_arr[i] = this.sorted_text[i]['tf-idf']
	  };
	  emit("tf-idf", {'vals':out_arr})
	};''' % n)
    return out
예제 #25
0
 def mapFunction(self):
     self.mapcode = Code("function() {"
                         "  var key = this.nppes_provider_state;"
                         "  var value = {"
                         "  count: 1,"
                         "  claim: this.average_submitted_chrg_amt,"
                         "  payment: this.average_Medicare_payment_amt"
                         " };"
                         " emit(key, value);"
                         "};")
     return self.mapcode
예제 #26
0
 def mapTopDriver(self):
     mapper = Code("""
                         function() {
                                var key = this.driver.name;
                                var value = {count : 1};
                                emit(key, value);
                         };
                         """)
     reducer = Code("""
                             function (key, values) {
                                 var count = 0;
                                 for(var i in values){
                                     count += values[i].count;
                                 }
                                 return {count: count};
                             };
                             """)
     result = self.db.order.map_reduce(mapper, reducer, "result")
     res = list(result.find())
     print res
예제 #27
0
 def mapTopCompany(self):
     mapper = Code("""
                     function() {
                         var key = this.Companies.Name;
                         var value = {count : 1};
                         emit(key, value);
                     };
                     """)
     reducer = Code("""
                     function(key, values) {
                         var count = 0;
                         for(var i in values) {
                             count += values[i].count;
                         }
                         return {count: count};
                     };
                     """)
     result = self.db.Flights.map_reduce(mapper, reducer, 'result')
     res = list(result.find())
     print res
예제 #28
0
 def __init__(self):
     self.mongo = Database()
     self.map = Code(
         "function() {"
         "  this.tags.forEach("
         "    function(tag) {"
         "      emit(tag, 1);"
         "    }"
         "  )"
         "}"
     )
     self.reduce = Code(
         "function(key, values) {"
         "  var total = 0;"
         "  for (var i=0; i<values.length; i++) {"
         "    total += values[i];"
         "  }"
         "  return total;"
         "}"
     )
예제 #29
0
 def reduceFunction(self):
     self.reducecode = Code("function(keyState, countStVals) {"
                             "  reduceVal = {count: 0, claim: 0, payment: 0};"
                             "  for (var provider = 0; provider < countStVals.length; provider++) {"
                             "    reduceVal.count += countStVals[provider].count;"
                             "    reduceVal.claim += countStVals[provider].claim;"
                             "    reduceVal.payment += countStVals[provider].payment;"
                             "  };"
                             "  return reduceVal;"
                             "};")
     return self.reducecode
예제 #30
0
def test_qop_ne_6(monty_find, mongo_find):
    docs = [
        {"a": [{"b": Code("a")}]},
    ]
    spec = {"a.b": {"$ne": "a"}}

    monty_c = monty_find(docs, spec)
    mongo_c = mongo_find(docs, spec)

    assert mongo_c.count() == 1
    assert monty_c.count() == mongo_c.count()