def test_repr(self):
     c = Code("hello world", {})
     self.assertEqual(repr(c), "Code('hello world', {})")
     c.scope["foo"] = "bar"
     self.assertEqual(repr(c), "Code('hello world', {'foo': 'bar'})")
     c = Code("hello world", {"blah": 3})
     self.assertEqual(repr(c), "Code('hello world', {'blah': 3})")
     c = Code("\x08\xFF")
     self.assertEqual(repr(c), "Code(%s, None)" % (repr("\x08\xFF"),))
 def test_code(self):
     a_string = "hello world"
     a_code = Code("hello world")
     self.assert_(a_code.startswith("hello"))
     self.assert_(a_code.endswith("world"))
     self.assert_(isinstance(a_code, Code))
     self.assertFalse(isinstance(a_string, Code))
     self.assertEqual(a_code.scope, {})
     a_code.scope["my_var"] = 5
     self.assertEqual(a_code.scope, {"my_var": 5})
 def test_code(self):
     a_string = "hello world"
     a_code = Code("hello world")
     self.assertTrue(a_code.startswith("hello"))
     self.assertTrue(a_code.endswith("world"))
     self.assertTrue(isinstance(a_code, Code))
     self.assertFalse(isinstance(a_string, Code))
     self.assertIsNone(a_code.scope)
     with_scope = Code('hello world', {'my_var': 5})
     self.assertEqual({'my_var': 5}, with_scope.scope)
     empty_scope = Code('hello world', {})
     self.assertEqual({}, empty_scope.scope)
     another_scope = Code(with_scope, {'new_var': 42})
     self.assertEqual(str(with_scope), str(another_scope))
     self.assertEqual({'new_var': 42, 'my_var': 5}, another_scope.scope)
     # No error.
     Code(u'héllø world¡')
Beispiel #4
0
    def where(self, code):
        """Adds a `$where`_ clause to this query.

        The `code` argument must be an instance of :class:`basestring`
        (:class:`str` in python 3) or :class:`~bson.code.Code`
        containing a JavaScript expression. This expression will be
        evaluated for each document scanned. Only those documents
        for which the expression evaluates to *true* will be returned
        as results. The keyword *this* refers to the object currently
        being scanned. For example::

            # Find all documents where field "a" is less than "b" plus "c".
            for doc in db.test.find().where('this.a < (this.b + this.c)'):
                print(doc)

        Raises :class:`TypeError` if `code` is not an instance of
        :class:`basestring` (:class:`str` in python 3). Raises
        :class:`~pymongo.errors.InvalidOperation` if this
        :class:`Cursor` has already been used. Only the last call to
        :meth:`where` applied to a :class:`Cursor` has any effect.

        .. note:: MongoDB 4.4 drops support for :class:`~bson.code.Code`
          with scope variables. Consider using `$expr`_ instead.

        :Parameters:
          - `code`: JavaScript expression to use as a filter

        .. _$expr: https://docs.mongodb.com/manual/reference/operator/query/expr/
        .. _$where: https://docs.mongodb.com/manual/reference/operator/query/where/
        """
        self.__check_okay_to_chain()
        if not isinstance(code, Code):
            code = Code(code)

        self.__spec["$where"] = code
        return self
Beispiel #5
0
    def test_cursor(self):
        db = self.db

        db.drop_collection("test")
        docs = [{
            'foo': [1, 2]
        }, {
            'bar': {
                'hello': 'world'
            }
        }, {
            'code': Code("function x() { return 1; }")
        }, {
            'bin': Binary(b("\x00\x01\x02\x03\x04"))
        }, {
            'dbref': {
                '_ref': DBRef('simple', ObjectId('509b8db456c02c5ab7e63c34'))
            }
        }]

        db.test.insert(docs)
        reloaded_docs = json_util.loads(json_util.dumps(db.test.find()))
        for doc in docs:
            self.assertTrue(doc in reloaded_docs)
Beispiel #6
0
    def where(self, code: Union[str, Code]) -> 'Cursor':
        """Adds a $where clause to this query.

        The `code` argument must be an instance of :class:`str`
        or :class:`~bson.code.Code` containing a JavaScript expression.
        This expression will be evaluated for each document scanned.
        Only those documents for which the expression evaluates to *true*
        will be returned as results. The keyword *this* refers to the object
        currently being scanned.

        Raises :class:`TypeError` if `code` is not an instance of
        :class:`str`. Raises :class:`~pymongo.errors.InvalidOperation` if this
        :class:`Cursor` has already been used. Only the last call to
        :meth:`where` applied to a :class:`Cursor` has any effect.

        :Parameters:
          - `code`: JavaScript expression to use as a filter
        """
        self.__check_okay_to_chain()
        if not isinstance(code, Code):
            code = Code(code)

        self.__spec['$where'] = code
        return self
Beispiel #7
0
def getErr(queryData, beginTime, endTime):
    """

    :param queryData:
    :param beginTime:
    :param endTime:
    :return:
    """
    getErrbaseData = queryData
    IMSI = []  # 存储问题imsi
    for dic in getErrbaseData:
        IMSI.append(str(dic['imsi']))
    condition = {
        "vsimImsi": {
            "$in": IMSI
        },
        # "errType":8,
        # "mcc":'602',
        # "errCode":{"$in" : [7,]},
        "errorTime": {
            "$gte": ((datetime_timestamp(beginTime)) * 1000),
            "$lte": ((datetime_timestamp(endTime)) * 1000)
        }
    }
    msMog = msmongo(MongoClient=MongoClient["N_oss_perflog"],
                    Database=Database["N_oss_perflog"],
                    Sheet=Sheet["t_term_vsim_estfail"])
    key = {"vsimImsi": 1, "errType": 1, "errCode": 1}
    initial = {"count": 0}
    reducer = Code("""function(obj, prev){prev.count ++}""")
    lis_mongo_tab = ["vsimImsi", "errType", "errCode", "count"]
    mongoErr = pygroup(msMog, "t_term_vsim_estfail", key, condition, initial,
                       reducer, lis_mongo_tab)
    returngetErrData = assemErrInfo(queryData=getErrbaseData, errData=mongoErr)

    return returngetErrData
Beispiel #8
0
    def test_group_with_scope(self):
        db = self.db
        db.drop_collection("test")
        db.test.save({"a": 1})
        db.test.save({"b": 1})

        reduce_function = "function (obj, prev) { prev.count += inc_value; }"

        self.assertEqual(
            2,
            db.test.group([], {}, {"count": 0},
                          Code(reduce_function, {"inc_value": 1}))[0]['count'])
        self.assertEqual(
            4,
            db.test.group([], {}, {"count": 0},
                          Code(reduce_function, {"inc_value": 2}))[0]['count'])

        self.assertEqual(
            1,
            db.test.group([], {}, {"count": 0},
                          Code(reduce_function,
                               {"inc_value": 0.5}))[0]['count'])

        if version.at_least(db.connection, (1, 1)):
            self.assertEqual(
                2,
                db.test.group([], {}, {"count": 0},
                              Code(reduce_function, {"inc_value": 1}),
                              command=True)[0]['count'])

            self.assertEqual(
                4,
                db.test.group([], {}, {"count": 0},
                              Code(reduce_function, {"inc_value": 2}),
                              command=True)[0]['count'])

            self.assertEqual(
                1,
                db.test.group([], {}, {"count": 0},
                              Code(reduce_function, {"inc_value": 0.5}),
                              command=True)[0]['count'])
Beispiel #9
0
def _get_code_w_scope(data, position, obj_end, opts, element_name):
    """Decode a BSON code_w_scope to bson.code.Code."""
    code, position = _get_string(data, position + 4, obj_end, opts,
                                 element_name)
    scope, position = _get_object(data, position, obj_end, opts, element_name)
    return Code(code, scope), position
Beispiel #10
0
def _parse_canonical_code(doc):
    """Decode a JSON code to bson.code.Code."""
    for key in doc:
        if key not in ('$code', '$scope'):
            raise TypeError('Bad $code, extra field(s): %s' % (doc, ))
    return Code(doc['$code'], scope=doc.get('$scope'))
Beispiel #11
0
def _get_code(data, position, obj_end, opts):
    """Decode a BSON code to bson.code.Code."""
    code, position = _get_string(data, position, obj_end, opts)
    return Code(code), position
Beispiel #12
0
def speed_query_(begTime, endTime, timeType, names, group, sex, minAge,
                 maxAge):
    TIME_OFFSET = 8 * 60 * 60

    code = "function() {"
    if names and names != []:
        code += "while(true){"
        for name in names:
            code += "if(this.name == \"" + name + "\")"
            code += "break;"
        code += "return;"
        code += "}"
    if group and group != '':
        code += "if(this.group.indexOf(\"" + group + "\")==-1)"
        code += "return;"
    if sex and sex != '':
        code += "if(this.sex != \"" + sex + "\")"
        code += "return;"

    if minAge and minAge != '':
        code += "if(this.age < " + str(minAge) + ")"
        code += "return;"

    if maxAge and maxAge != '':
        code += "if(this.age > " + str(maxAge) + ")"
        code += "return;"

    code += "if(this.time < " + begTime + ")"
    code += "return;"
    code += "if(this.time > " + endTime + ")"
    code += "return;"

    code += "var date = new Date(this.time*1000);"
    if timeType == TIME_TYPE_DAY:
        code += "var time = Date.UTC(date.getFullYear(), date.getMonth(), date.getDate());"
    elif timeType == TIME_TYPE_MONTH:
        code += "var time = Date.UTC(date.getFullYear(), date.getMonth());"
    elif timeType == TIME_TYPE_YEAR:
        code += "var time = Date.UTC(date.getFullYear(), 0);"

    code += "time /= 1000;"
    code += "time -=" + str(TIME_OFFSET) + ";"

    code += "emit(time, {sum: this.speed, count: 1, minSpeed: this.speed, maxSpeed: this.speed});"
    code += "}"
    # print code
    map = Code(code)

    code = "function(key, values) {"
    code += "var result = {sum: 0, count: 0, minSpeed: 99999, maxSpeed: 0};"
    code += "values.forEach( function(value) {"
    code += "result.sum += value.sum;"
    code += "result.count += value.count;"
    code += "if(result.maxSpeed < value.maxSpeed ){"
    code += "result.maxSpeed = value.maxSpeed"
    code += "}"
    code += "if(result.minSpeed > value.minSpeed){"
    code += "result.minSpeed = value.minSpeed"
    code += "}"
    code += "});"
    code += "return result;"
    code += "}"
    # print code
    reduce = Code(code)

    code = "function (key, reducedVal) {"
    code += "reducedVal.aveSpeed = reducedVal.sum / reducedVal.count;"
    code += "return reducedVal;"
    code += "}"
    # print code
    finalizeFunc = Code(code)

    result = db.rtls.map_reduce(map,
                                reduce,
                                finalize=finalizeFunc,
                                out={'merge': 'result'})
    ret = []
    for doc in result.find():
        ret.append(doc)

    print json.dumps(ret)

    return ret
Beispiel #13
0
from pymongo import MongoClient
from bson.code import Code
import pprint

client = MongoClient('mongodb://localhost:27017')
db = client['data']

mapper = Code("""
                function() {
                    var value = {}
                    value.val = this.value_of_goods_in_rupees
                    value.desc = this.description_of_goods
                    emit(this.port_or_country_of_origin, value);
                }
            """)

reducer = Code("""
                function (key, vals) {
                 var max = 0.0;
                 var desc = "";
                 for(var i = 0; i < vals.length; i++ ){
                    if(vals[i] > max){
                        max =  vals[i].val;
                        desc = descs[i].desc;
                    }
                 }
                 return desc;
             }
            """)

result = db.imports.map_reduce(mapper, reducer, "import_port")
Beispiel #14
0
def make_leaderboard(condition):
    """
    Returns a list of user and activity counts for activity that
    matches the conditions of 'condition' - an object used to query
    the history collection.

    This fucntion queries and calculates for all currently matching history.
    """

    reducer = Code("""
                    function(obj, prev) {

                        // Total Points
                        switch(obj.rev_type) {
                            case "add text":
                                if (obj.language !== 'he' && obj.version === "Sefaria Community Translation") {
                                    prev.count += Math.max(obj.revert_patch.length / 10, 10);
                                    prev.translateCount += 1
                                } else if(obj.language !== 'he') {
                                    prev.count += Math.max(obj.revert_patch.length / 400, 2);
                                    prev.addCount += 1
                                } else {
                                    prev.count += Math.max(obj.revert_patch.length / 800, 1);
                                    prev.addCount += 1
                                }
                                break;
                            case "edit text":
                                prev.count += Math.max(obj.revert_patch.length / 1200, 1);
                                prev.editCount += 1
                                break;
                            case "revert text":
                                prev.count += 1;
                                break;
                            case "review":
                                prev.count += 15;
                                prev.reviewCount += 1;
                                break;
                            case "add index":
                                prev.count += 5;
                                break;
                            case "edit index":
                                prev.count += 1;
                                prev.editCount += 1
                                break;
                            case "add link":
                                prev.count += 2;
                                prev.linkCount += 1;
                                break;
                            case "edit link":
                                prev.editCount += 1
                                prev.count += 1;
                                break;
                            case "delete link":
                                prev.count += 1;
                                break;
                            case "add note":
                                prev.count += 1;
                                prev.noteCount += 1;
                                break;
                            case "edit note":
                                prev.count += 1;
                                break;
                            case "delete note":
                                prev.count += 1;
                                break;
                        }

                        // Texts worked on
                        var refs = []
                        if ("ref" in obj && obj.ref) {
                            refs.push(obj.ref);
                        } else if ("refs" in obj && obj.refs[0] && obj.refs[1]) {
                            refs.push(obj.refs[0]);
                            refs.push(obj.refs[1]);
                        }
                        refs.forEach(function(ref) {
                            var text = ref;
                            var i = text.search(/\d/);
                            var text = text.slice(0,i).trim()

                            if (prev.texts[text]) {
                                prev.texts[text] += 1;
                            } else {
                                prev.texts[text] = 1;
                            }
                        });
                    }
                """)

    leaders = db.history.group(['user'],
                        condition,
                        {
                            'count': 0,
                            'translateCount': 0,
                            'addCount': 0,
                            'editCount': 0,
                            'linkCount': 0,
                            'noteCount': 0,
                            'reviewCount': 0,

                            'texts': {}
                        },
                        reducer)

    return sorted(leaders, key=lambda x: -x["count"])
Beispiel #15
0
def mapReduce(collection):
    map = Code("function() {emit(this.word+' : '+this.sentiment,1);};")
    reduce = Code("function(word,count) {return Array.sum(count);};")
    collection.map_reduce(map, reduce, "words_frequency")
    return
    client = MongoClient(args.host)
    # SQL database
    database = client[args.database]
    # SQL table
    collection = database[args.collection]

    # http://docs.mongodb.org/manual/reference/method/db.collection.mapReduce/
    from bson.code import Code

    mapper = None
    if args.value_array_type:
        mapper = Code("""
			function() {
				this.""" + str(args.key) + """.forEach(function(z) {
					(""+z).split(/[\s\[\],\(\)"\.]+/).forEach(function(v) {
						if(v && v.length )
							emit(v, {'count':1});
					});
				});
			}
			""")
    else:
        #mapper = Code (
        mapper = """
			function() {
				var func = {
					'author':function() {
						return 'changyy';
					}
				};
				(""+this.""" + str(
            args.key) + """).split(/[\s\[\],\(\)"\.]+/).forEach(function(v){
Beispiel #17
0
def _get_code(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    code, position = _get_string(data, position,
                                 as_class, tz_aware, uuid_subtype, compile_re)
    return Code(code), position
Beispiel #18
0
    print('{}\t{}'.format(x, counter))

print('unique_tweet_count\t{}'.format(unique_tweet_count))
"""

mapper = Code("""
                function () {
                    var pronouns = ["han","hon","den","det","denna","denne","hen"];
                    pronouns.sort();
                    var tweet = this.tweet;

                    for (var i = 0; i < pronouns.length; i++){
                        var replace = "[\\W \\w]?\\b" + pronouns[i] + "\\b[\\W \\w]?";
                        var regrex = new RegExp(replace, "ig")
                        var num_a = [];
                        var num_n = 0;
                        if (tweet.match(regrex) == null) {
                            num_n = 0;
                        } else {
                            num_a.push(tweet.match(regrex));
                            emit(pronouns[i], num_a.length);
                        }
                    }

                }
                """)

reducer = Code("""
                function (key, values) {
                    var total = 0;
                    for (var i = 0; i < values.length; i++) {
Beispiel #19
0
def getNews(**kwargs):
    page_num = 1
    if 'page_num' in kwargs.keys():
        page_num = kwargs['page_num']
    start = (page_num - 1) * NEWS_PER_PAGE
    end = page_num * NEWS_PER_PAGE

    if 'user_id' not in kwargs.keys():
        news_list = list(
            NEWS_COLLECTION.find({},
                                 limit=MAX_NEWS,
                                 sort=[('publishedAt', -1)]))
        news_list = news_list[start:end]
        for news in news_list:
            del news['_id']
            del news['text']
            news['publishedAt'] = news['publishedAt'].isoformat()
        return news_list

    digest_list = []
    if redis_client.get(kwargs['user_id']) is None:
        user = USER_COLLECTION.find_one({'_id': ObjectId(kwargs['user_id'])})
        if user is None:
            return None
        news_list = list(
            NEWS_COLLECTION.find({},
                                 limit=MAX_NEWS,
                                 sort=[('publishedAt', -1)]))
        # print(news_list[0]['publishedAt'].timestamp())
        if 'preference' in user.keys() and len(user['preference']) > 0:
            time_diff = news_list[0]['publishedAt'].timestamp(
            ) - news_list[-1]['publishedAt'].timestamp()
            smooth_portion = SMOOTH_FACTOR * time_diff
            min_time = news_list[-1]['publishedAt'].timestamp()
            for news in news_list:
                digest_list.append(
                    (news['digest'],
                     ((news['publishedAt'].timestamp() - min_time +
                       smooth_portion) / (time_diff + smooth_portion)) *
                     user['preference'][news['class'] - 1]))
                # digest_list.append((news['digest'],news['publishedAt'].timestamp()*user['preference'][news['class']-1]))
            digest_list = sorted(digest_list, key=itemgetter(1), reverse=True)
            digest_list = [i[0] for i in digest_list]
        else:
            for news in news_list:
                digest_list.append(news['digest'])
        redis_client.set(kwargs['user_id'],
                         pickle.dumps(digest_list),
                         ex=TIMEOUT_USER_IN_SECONDS)
    else:
        digest_list = pickle.loads(redis_client.get(kwargs['user_id']))

    slice_index = digest_list[start:end]
    # print(slice_index)
    # news_slice_list = list(NEWS_COLLECTION.find({
    #     'digest': {'$in': slice_index}
    # }))

    #USE MapReduce to preserve order since by default it returns result in the ascending order of key
    map_reduce_res = NEWS_COLLECTION.map_reduce(
        map=Code("""
                function (){
                    var order = inputs.indexOf(this.digest);
                    emit(order,  this);
                }
            """),
        reduce=Code("""
                function (){}
            """),
        out={"inline": 1},
        query={"digest": {
            "$in": slice_index
        }},
        scope={"inputs": slice_index}
        # finalize=Code("""
        #     function(key,value){
        #         return value.doc;
        #     }
        # """)
    )
    news_slice_list = [news['value'] for news in map_reduce_res['results']]

    # print(news_slice_list)
    for news in news_slice_list:
        del news['_id']
        del news['text']
        news['publishedAt'] = news['publishedAt'].isoformat()
    return news_slice_list
Beispiel #20
0
def mapReduce(tskillList, toolskills, yearofexp, edulevel):

    collectionName = "resume"
    resumeCollection = DBconnection.getcollection(collectionName)
    newcollectionName = "mroutput"
    mroutputCollection = DBconnection.getcollection(newcollectionName)

    mapper = Code("function () {"
                  "for (index in this.WorkExperience) {"
                  "emit(this.Email,this.WorkExperience[index]);"
                  "}"
                  "}")

    reducer = Code("function(email, workex) {"
                   "var regex1 = new RegExp('collaborated', 'i');"
                   "var regex2 = new RegExp('managed', 'i');"
                   "var regex3 = new RegExp('developed', 'i');"
                   "var regex4 = new RegExp('designed', 'i');"
                   "var regex5 = new RegExp('initiated', 'i');"
                   "total = 0;"
                   "lscore = 0;"
                   "twscore = 0;"
                   "ascore = 0;"
                   "if(regex1.test(workex))"
                   "lscore += 1;"
                   "twscore += 1;"
                   "if(regex2.test(workex))"
                   "lscore += 1;"
                   "twscore += 1;"
                   "if(regex3.test(workex))"
                   "ascore += 1;"
                   "if(regex4.test(workex))"
                   "ascore += 1;"
                   "if(regex5.test(workex))"
                   "lscore += 1;"
                   "ascore += 1;"
                   "total = lscore + twscore + ascore-1;"
                   "return total"
                   "}")

    resumeCollection.map_reduce(mapper,
                                reducer,
                                "mroutput",
                                query={
                                    "TechSkills": {
                                        "$all": tskillList
                                    },
                                    "ToolSkills": {
                                        "$all": toolskills
                                    },
                                    "EducationLevel": edulevel,
                                    "yearsOfExperience": yearofexp
                                })

    # Getting the values of the mapreduce result

    outputlist = []
    for doc in mroutputCollection.find():
        outputlist.append(doc)

    return (outputlist)
Beispiel #21
0
import pymongo
from bson.code import Code
import datetime

print(datetime.datetime.now())

mongocli = pymongo.MongoClient("mongodb://localhost:27017/")
db = mongocli["bigdata"]
movies = db["movies"]

map = Code("function () {"
           "  for(var i in this.cast) {"
           "     emit({name:this.cast[i].name}, 1);"
           "  }"
           "}")

reduce = Code("function (key, value) {" "    return Array.sum(value);" "}")

result = movies.map_reduce(map, reduce, "myresults")
for doc in result.find().sort("value", -1).limit(20):
    print(doc)

print(datetime.datetime.now())
Beispiel #22
0
def update_stats(verbose=True):
    from data_mgt.utilities.rewrite import update_attribute_stats
    from bson.code import Code
    ec = C.elliptic_curves
    ecdbstats = ec.nfcurves.stats

    # get list of degrees

    degrees = nfcurves.distinct('degree')
    if verbose:
        print("degrees: {}".format(degrees))

    # get list of signatures for each degree.  Note that it would not
    # work to use nfcurves.find({'degree':d}).distinct('signature')
    # since 'signature' is currently a list of integers an mongo would
    # return a list of integers, not a list of lists.  With hindsight
    # it would have been better to store the signature as a string.

    if verbose:
        print("Adding signatures_by_degree")
    reducer = Code("""function(key,values){return Array.sum(values);}""")
    attr = 'signature'
    mapper = Code("""function(){emit(""+this.""" + attr + """,1);}""")
    sigs_by_deg = {}
    for d in degrees:
        sigs_by_deg[str(d)] = [
            r['_id'] for r in nfcurves.inline_map_reduce(
                mapper, reducer, query={'degree': d})
        ]
        if verbose:
            print("degree {} has signatures {}".format(d, sigs_by_deg[str(d)]))

    entry = {'_id': 'signatures_by_degree'}
    ecdbstats.delete_one(entry)
    entry.update(sigs_by_deg)
    ecdbstats.insert_one(entry)

    # get list of fields for each signature.  Simple code here faster than map/reduce

    if verbose:
        print("Adding fields_by_signature")
    from sage.misc.flatten import flatten
    sigs = flatten(sigs_by_deg.values())
    fields_by_sig = dict([
        sig,
        nfcurves.find({
            'signature': [int(x) for x in sig.split(",")]
        }).distinct('field_label')
    ] for sig in sigs)
    entry = {'_id': 'fields_by_signature'}
    ecdbstats.delete_one(entry)
    entry.update(fields_by_sig)
    ecdbstats.insert_one(entry)

    # get list of fields for each degree

    if verbose:
        print("Adding fields_by_degree")
    fields_by_deg = dict(
        [str(d),
         sorted(nfcurves.find({
             'degree': d
         }).distinct('field_label'))] for d in degrees)
    entry = {'_id': 'fields_by_degree'}
    ecdbstats.delete_one(entry)
    entry.update(fields_by_deg)
    ecdbstats.insert_one(entry)

    fields = flatten(fields_by_deg.values())
    if verbose:
        print("{} fields, {} signatures, {} degrees".format(
            len(fields), len(sigs), len(degrees)))

    if verbose:
        print("Adding curve counts for torsion order, torsion structure")
    update_attribute_stats(ec, 'nfcurves',
                           ['torsion_order', 'torsion_structure'])

    if verbose:
        print("Adding curve counts by degree, signature and field")
    update_attribute_stats(ec, 'nfcurves',
                           ['degree', 'signature', 'field_label'])

    if verbose:
        print("Adding class counts by degree, signature and field")
    update_attribute_stats(ec,
                           'nfcurves', ['degree', 'signature', 'field_label'],
                           prefix="classes",
                           filter={'number': int(1)})

    # conductor norm ranges:
    # total:
    if verbose:
        print("Adding curve and class counts and conductor range")
    norms = ec.nfcurves.distinct('conductor_norm')
    data = {
        'ncurves': ec.nfcurves.count(),
        'nclasses': ec.nfcurves.find({
            'number': 1
        }).count(),
        'min_norm': min(norms),
        'max_norm': max(norms),
    }
    entry = {'_id': 'conductor_norm'}
    ecdbstats.delete_one(entry)
    entry.update(data)
    ecdbstats.insert_one(entry)

    # by degree:
    if verbose:
        print("Adding curve and class counts and conductor range, by degree")
    degree_data = {}
    for d in degrees:
        query = {'degree': d}
        res = nfcurves.find(query)
        ncurves = res.count()
        Ns = res.distinct('conductor_norm')
        min_norm = min(Ns)
        max_norm = max(Ns)
        query['number'] = 1
        nclasses = nfcurves.count(query)
        degree_data[str(d)] = {
            'ncurves': ncurves,
            'nclasses': nclasses,
            'min_norm': min_norm,
            'max_norm': max_norm,
        }

    entry = {'_id': 'conductor_norm_by_degree'}
    ecdbstats.delete_one(entry)
    entry.update(degree_data)
    ecdbstats.insert_one(entry)

    # by signature:
    if verbose:
        print(
            "Adding curve and class counts and conductor range, by signature")
    sig_data = {}
    for sig in sigs:
        query = {'signature': [int(c) for c in sig.split(",")]}
        res = nfcurves.find(query)
        ncurves = res.count()
        Ns = res.distinct('conductor_norm')
        min_norm = min(Ns)
        max_norm = max(Ns)
        query['number'] = 1
        nclasses = nfcurves.count(query)
        sig_data[sig] = {
            'ncurves': ncurves,
            'nclasses': nclasses,
            'min_norm': min_norm,
            'max_norm': max_norm,
        }
    entry = {'_id': 'conductor_norm_by_signature'}
    ecdbstats.delete_one(entry)
    entry.update(sig_data)
    ecdbstats.insert_one(entry)

    # by field:
    if verbose:
        print("Adding curve and class counts and conductor range, by field")
    entry = {'_id': 'conductor_norm_by_field'}
    ecdbstats.delete_one(entry)
    field_data = {}
    for f in fields:
        ff = f.replace(".", ":")  # mongo does not allow "." in key strings
        query = {'field_label': f}
        res = nfcurves.find(query)
        ncurves = res.count()
        Ns = res.distinct('conductor_norm')
        min_norm = min(Ns)
        max_norm = max(Ns)
        query['number'] = 1
        nclasses = nfcurves.count(query)
        field_data[ff] = {
            'ncurves': ncurves,
            'nclasses': nclasses,
            'min_norm': min_norm,
            'max_norm': max_norm,
        }
    entry = {'_id': 'conductor_norm_by_field'}
    ecdbstats.delete_one(entry)
    entry.update(field_data)
    ecdbstats.insert_one(entry)
Beispiel #23
0
def prueba(request):
	#Código de mongodb
	client = MongoClient(ip, port)
	db = client['twitterdata']
	tweetCollection = db['tweets']
	for i in range(20):
		result2 = tweetCollection.map_reduce(mapCuentaDias, reduce,"myresult2", full_response = True)
		print result2	
	client.close()
	#return the template
	return render_to_response('userStatistics.html', locals())

mapRT = Code('function(){'
           'if (this.retweeted_status){'
           'emit(this.text, this.retweeted_status.retweet_count);'
           '}'
           '}')

reduceRT = Code("function (key, values) {"
               " var total = Math.max(values);"
               " return total;"
               "}")

mapUser = Code('function(){'
           '   emit(this.user.screen_name, 1);'
           '}')

mapPopular = Code('function(){'
           'var palabras = new Array;'
           'palabras = this.text.split(" ");'
Beispiel #24
0
 def __setattr__(self, name, code):
     self._db.system.js.replace_one({"_id": name}, {
         "_id": name,
         "value": Code(code)
     }, True)
Beispiel #25
0
            init_kafka()


if __name__ == '__main__':
    # user_id = int(sys.argv[1])
    # type = sys.argv[2]
    # send_message(user_id, type)
    mongo = db.connect_mongo()
    # items = mongo.message.user_message.find({"topicMessageId":39658}).sort("_id", 1)
    # for item in items:
    #     logger.info("%s - %s", item["_id"].generation_time.strftime("%Y/%m/%d %H:%M:%S"), item["userId"])

    mapper_topic_message = Code("""
        function () {
            if(!!this.topicMessageId){
                emit(this.userId + "-" + this.topicMessageId, 1);
            }
        }
    """)

    mapper_company_message = Code("""
            function () {
                if(!!this.companyMessageId){
                    emit(this.userId + "-" + this.companyMessageId, 1);
                }
            }
        """)

    reducer = Code("""
        function (key, values) {
            var total = 0;
 def __setattr__(self, name, code):
     self._db.system.js.save({"_id": name, "value": Code(code)}, safe=True)
Beispiel #27
0
from pymongo import MongoClient

from bson.code import Code

import sys

if __name__ == '__main__':
    client = MongoClient("mongodb://localhost:27017/")
    db = client.project_542

    mapper = Code("""
                        function(){
                            if (this.Mods){
                                for (key in this.Mods){
                                    emit(key, null);
                                }
                            }
                        }
                      """)

    reducer = Code("""
                        function(key, values){
                        return null;
                        }
                      """)
    result = db.posts.map_reduce(mapper, reducer, "myresults")
    Mods_list = []
    f = open('mods.txt', 'w')
    for n in result.find():
        f.write("'")
        f.write(n['_id'])
    def execute(trial=False):

        startTime = datetime.datetime.now()

        # Setup the database connection
        # client = dml.pymongo.MongoClient()
        # repo = client.repo
        # repo.authenticate(getAuth('db_username'), getAuth('db_password'))

        # Connect to the Database
        repo = openDb(getAuth("db_username"), getAuth("db_password"))

        # get the collection
        earnings = repo['aydenbu_huangyh.earningsReport']

        # MaoReduce function
        mapper = Code("""
            function() {
                var k = this.postal;
                if  (k != null) {
                    k = k.substring(1);
                    var v = {count:1, totalEarnings:parseFloat(this.total_earnings), avg:parseFloat(this.total_earnings)};
                    emit(k, v)
                }
            }
            """)

        reducer = Code("""
            function(k, vs) {
                reduceVal = {count:0, totalEarnings:0, avg:0};
                for (var i = 0; i < vs.length; i++) {
                    reduceVal.count += vs[i].count;
                    reduceVal.totalEarnings += parseFloat(vs[i].totalEarnings);
                }
                reduceVal.avg = (reduceVal.totalEarnings/reduceVal.count).toFixed(2);
                return reduceVal;
            }
            """)

        repo.dropPermanent("zip_avg_earnings")
        result = earnings.map_reduce(mapper, reducer,
                                     "aydenbu_huangyh.zip_avg_earnings")

        zip_avg_earnings = repo['aydenbu_huangyh.zip_avg_earnings']

        # Remove the unrelated field, only keep the avg field
        zip_avg_earnings.update(
            {}, {'$unset': {
                'value.count': '',
                'value.totalEarnings': ''
            }},
            multi=True,
            upsert=False)
        '''
                # Save the result to the db
                # repo.dropPermanent("zip_hospitals_count")
                #repo.createPermanent("zip_hospitals_count")
                # repo['aydenbu_huangyh.zip_hospitals_count'].insert_many(result)
                '''
        repo.logout()
        endTime = datetime.datetime.now()

        return {"start": startTime, "end": endTime}
 def __getattr__(self, name):
     return lambda *args: self._db.eval(Code("function() { "
                                             "return this[name].apply("
                                             "this, arguments); }",
                                             scope={'name': name}), *args)
def _get_code(data, view, position, obj_end, opts, element_name):
    """Decode a BSON code to bson.code.Code."""
    code, position = _get_string(data, view, position, obj_end, opts,
                                 element_name)
    return Code(code), position
Beispiel #31
0
def object_hook(dct):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        dtm = dct["$date"]
        # mongoexport 2.6 and newer
        if isinstance(dtm, string_type):
            aware = datetime.datetime.strptime(
                dtm[:23], "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=utc)
            offset = dtm[23:]
            if not offset or offset == 'Z':
                # UTC
                return aware
            else:
                if len(offset) == 5:
                    # Offset from mongoexport is in format (+|-)HHMM
                    secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60)
                elif ':' in offset and len(offset) == 6:
                    # RFC-3339 format (+|-)HH:MM
                    hours, minutes = offset[1:].split(':')
                    secs = (int(hours) * 3600 + int(minutes) * 60)
                else:
                    # Not RFC-3339 compliant or mongoexport output.
                    raise ValueError("invalid format for offset")
                if offset[0] == "-":
                    secs *= -1
                return aware - datetime.timedelta(seconds=secs)
        # mongoexport 2.6 and newer, time before the epoch (SERVER-15275)
        elif isinstance(dtm, collections.Mapping):
            secs = float(dtm["$numberLong"]) / 1000.0
        # mongoexport before 2.6
        else:
            secs = float(dtm) / 1000.0
        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
    if "$regex" in dct:
        flags = 0
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)
        return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        if isinstance(dct["$type"], int):
            dct["$type"] = "%02x" % dct["$type"]
        subtype = int(dct["$type"], 16)
        if subtype >= 0xffffff80:  # Handle mongoexport values
            subtype = int(dct["$type"][6:], 16)
        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if "$uuid" in dct:
        return uuid.UUID(dct["$uuid"])
    if "$undefined" in dct:
        return None
    if "$numberLong" in dct:
        return Int64(dct["$numberLong"])
    if "$timestamp" in dct:
        tsp = dct["$timestamp"]
        return Timestamp(tsp["t"], tsp["i"])
    return dct
def object_hook(dct, compile_re=True):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        dtm = dct["$date"]
        # mongoexport 2.6 and newer
        if isinstance(dtm, str):
            # datetime.datetime.strptime is new in python 2.5
            naive = datetime.datetime(
                *(time.strptime(dtm[:19], "%Y-%m-%dT%H:%M:%S")[0:6]))
            # The %f format is new in python 2.6
            micros = int(dtm[20:23]) * 1000
            aware = naive.replace(microsecond=micros, tzinfo=utc)
            offset = dtm[23:]
            if not offset or offset == 'Z':
                # UTC
                return aware
            else:
                if len(offset) == 5:
                    # Offset from mongoexport is in format (+|-)HHMM
                    secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60)
                elif ':' in offset and len(offset) == 6:
                    # RFC-3339 format (+|-)HH:MM
                    hours, minutes = offset[1:].split(':')
                    secs = (int(hours) * 3600 + int(minutes) * 60)
                else:
                    # Not RFC-3339 compliant or mongoexport output.
                    raise ValueError("invalid format for offset")
                if offset[0] == "-":
                    secs *= -1
                return aware - datetime.timedelta(seconds=secs)
        # mongoexport 2.6 and newer, time before the epoch (SERVER-15275)
        elif isinstance(dtm, dict):
            secs = float(dtm["$numberLong"]) / 1000.0
        # mongoexport before 2.6
        else:
            secs = float(dtm) / 1000.0
        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
    if "$regex" in dct:
        flags = 0
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)

        if compile_re:
            return re.compile(dct["$regex"], flags)
        else:
            return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        if isinstance(dct["$type"], int):
            dct["$type"] = "%02x" % dct["$type"]
        subtype = int(dct["$type"], 16)
        if subtype >= 0xffffff80:  # Handle mongoexport values
            subtype = int(dct["$type"][6:], 16)
        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if bson.has_uuid() and "$uuid" in dct:
        return bson.uuid.UUID(dct["$uuid"])
    if "$undefined" in dct:
        return None
    if "$numberLong" in dct:
        # 2to3 will change this to int. PyMongo 3.0 supports
        # a new type, Int64, to avoid round trip issues.
        return int(dct["$numberLong"])
    if "$timestamp" in dct:
        tsp = dct["$timestamp"]
        return Timestamp(tsp["t"], tsp["i"])
    return dct
Beispiel #33
0
    def get(self, topic):
        client = MongoClient('bigdata-mongodb-01', 27017)
        db = client['Grupo10']
        mine = db['tweets']

        map = Code("""function()
        {var
        text = this.text;
        text = text.replace('.',' '); text = text.replace(',',' '); text = text.replace('(',' '); text = text.replace(')',' '); text = text.replace(':',' ');
        var
        wordArr = text.toLowerCase().split(' ');
        var
        stoppedwords = 'el, la, de, es, a, un, una, que, de, por, para, como, al, ?, !, +, y, no, los, las, en, se, lo, con, o, del, q, su, //t, https, si, mas, le, cuando, ellos, este, son, tan, esa, eso, ha, sus, e, pero, porque, tienen, d';
        var
        stoppedwordsobj = [];
        var
        uncommonArr = [];
        stoppedwords = stoppedwords.split(',');
        for (i = 0; i < stoppedwords.length; i++ ) {stoppedwordsobj[stoppedwords[i].trim()] = true;}
        for ( i = 0; i < wordArr.length; i++ ) {word = wordArr[i].trim().toLowerCase(); if ( !stoppedwordsobj[word] ) {uncommonArr.push(word);}}
        for (var i = uncommonArr.length - 1; i >= 0; i--) {if (uncommonArr[i]) {if (uncommonArr[i].startsWith("#")) {emit(uncommonArr[i], 1);}}}}"""
                   )

        reduce = Code("""function( key, values ) {
        var count = 0;
        values.forEach(function(v) {
            count +=v;
        });
        return count;
        }""")

        regx = re.compile(topic, re.IGNORECASE)
        result = mine.map_reduce(map,
                                 reduce,
                                 "myresults",
                                 query={"text": regx})

        json_result = []
        for doc in result.find():
            json_result.append(doc)

        d = path.dirname(__file__)
        col_mask = np.array(Image.open(path.join(d, "images/col.png")))
        #text = "y es es es es es es es es es es es es forcing the closing of the figure window in my giant loop, so I do"
        #text = open(path.join(d, 'images/red.txt')).read()

        jsonCloud = json.loads(dumps(json_result))
        text = ""

        for item in jsonCloud:
            for x in xrange(1, int(item['value'] * 2)):
                text += " " + item['_id']

        wordcloud = WordCloud(width=1000, height=800,
                              max_font_size=1000).generate(text)
        #wordcloud = WordCloud(mask=col_mask, max_font_size=1000).generate(text)
        #fig = plt.figure(figsize=(4.2,6.2))
        fig = plt.figure(figsize=(20, 10))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis("off")
        fig.savefig('images/foo.png', facecolor='k', bbox_inches='tight')
        self.write(dumps(json_result))