def load_data(self, region, N, limit=0): DS = DataSource() db = DS.IPdatabase() maxlimit = db[region].count() if (limit <= 0) or (limit > maxlimit): limit = maxlimit cursor = db[region].aggregate([ { '$sort': { 'date': 1 } }, { '$limit': limit }, { '$project': { #'address': { '$concat': [{'$toString': '$b1'},'.',{'$toString': '$b2'},'.',{'$toString': '$b3'},'.',{'$toString': '$b4'}] } 'address': { '$concat': [{ '$toString': '$b1' }, '.', { '$toString': '$b2' }, '.', { '$toString': '$b3' }] } #'address': { '$concat': [{'$toString': '$b1'},'.',{'$toString': '$b2'}] } } }, ]) seq = list(cursor) seq = [v['address'] for v in seq] return np.array_split(seq, len(seq) // N)
def load_unique_data_with_frequencies(self, region, N, limit=0): DS = DataSource() db = DS.IPdatabase() maxlimit = db[region].count() if (limit <= 0) or (limit > maxlimit): limit = maxlimit cursor = db[region].aggregate([ { '$sort': { 'date': 1 } }, { '$limit': limit }, { '$project': { #'address': { '$concat': [{'$toString': '$b1'},'.',{'$toString': '$b2'},'.',{'$toString': '$b3'},'.',{'$toString': '$b4'}] } 'address': { '$concat': [{ '$toString': '$b1' }, '.', { '$toString': '$b2' }, '.', { '$toString': '$b3' }] } #'address': { '$concat': [{'$toString': '$b1'},'.',{'$toString': '$b2'}] } } }, { '$group': { '_id': { 'address': '$address' }, 'count': { "$sum": 1 } } } ]) seq = list(cursor) seq = [(v['_id']['address'], v['count']) for v in seq] seq = sorted(seq, key=lambda x: x[1], reverse=True) return [s[0] for s in seq]