Beispiel #1
0
    def load_data(self, region, N, limit=0):
        DS = DataSource()
        db = DS.IPdatabase()
        maxlimit = db[region].count()
        if (limit <= 0) or (limit > maxlimit):
            limit = maxlimit

        cursor = db[region].aggregate([
            {
                '$sort': {
                    'date': 1
                }
            },
            {
                '$limit': limit
            },
            {
                '$project': {
                    #'address': { '$concat': [{'$toString': '$b1'},'.',{'$toString': '$b2'},'.',{'$toString': '$b3'},'.',{'$toString': '$b4'}] }
                    'address': {
                        '$concat': [{
                            '$toString': '$b1'
                        }, '.', {
                            '$toString': '$b2'
                        }, '.', {
                            '$toString': '$b3'
                        }]
                    }
                    #'address': { '$concat': [{'$toString': '$b1'},'.',{'$toString': '$b2'}] }
                }
            },
        ])
        seq = list(cursor)
        seq = [v['address'] for v in seq]
        return np.array_split(seq, len(seq) // N)
Beispiel #2
0
 def load_unique_data_with_frequencies(self, region, N, limit=0):
     DS = DataSource()
     db = DS.IPdatabase()
     maxlimit = db[region].count()
     if (limit <= 0) or (limit > maxlimit):
         limit = maxlimit
     cursor = db[region].aggregate([
         {
             '$sort': {
                 'date': 1
             }
         },
         {
             '$limit': limit
         },
         {
             '$project': {
                 #'address': { '$concat': [{'$toString': '$b1'},'.',{'$toString': '$b2'},'.',{'$toString': '$b3'},'.',{'$toString': '$b4'}] }
                 'address': {
                     '$concat': [{
                         '$toString': '$b1'
                     }, '.', {
                         '$toString': '$b2'
                     }, '.', {
                         '$toString': '$b3'
                     }]
                 }
                 #'address': { '$concat': [{'$toString': '$b1'},'.',{'$toString': '$b2'}] }
             }
         },
         {
             '$group': {
                 '_id': {
                     'address': '$address'
                 },
                 'count': {
                     "$sum": 1
                 }
             }
         }
     ])
     seq = list(cursor)
     seq = [(v['_id']['address'], v['count']) for v in seq]
     seq = sorted(seq, key=lambda x: x[1], reverse=True)
     return [s[0] for s in seq]