コード例 #1
0
ファイル: user_slicer.py プロジェクト: whateverpal/group_spy
def slice_group_users(group_id, samples_count, filter_json_schema):
    BLOCK_SIZE = 1000.0
    credentials = get_credentials()
    members_count = VKCrawler([credentials[0]]).get_group_members_count(group_id)
    total_blocks = ceil(members_count / BLOCK_SIZE)
    blocks_for_samples = int(min(total_blocks, ceil(samples_count) / BLOCK_SIZE))
    ordered_blocks = range(int(total_blocks))
    shuffle(ordered_blocks)
    blocks_to_fetch = ordered_blocks[0:blocks_for_samples]

    past = datetime.now()
    fetched = 0
    ids = []
    while fetched < len(blocks_to_fetch):
        jobs = []
        for c in credentials:
            jobs.append(GroupMembersJob(c, group_id, BLOCK_SIZE * blocks_to_fetch[fetched], BLOCK_SIZE))
            fetched += 1
            if fetched >= len(blocks_to_fetch):
                break
        for j in jobs:
            j.start()
        for j in jobs:
            j.join()
        for response in [j.response for j in jobs]:
            ids.append(response)   
    ids = [item for sublist in ids for item in sublist]
    #print "ids received in: " + str(datetime.now() - past)
    
    
    past = datetime.now()
    crawler = VKCrawler(credentials)
    profiles = [p for p in crawler.get_profiles(ids) if not is_user_banned(p)]
    #print "profiles received in: " + str(datetime.now() - past)
    
    augment_profiles_with_extended_geo_info(crawler, profiles)
    augment_profiles_with_extended_age_info(profiles)
    filtered_profiles = filter_profiles_by_json_schema(profiles, filter_json_schema, members_count)
    
    ages = {}
    for p in filtered_profiles:
        if 'age' in p:
            if not p['age'] in ages:
                ages[p['age']] = 0
            ages[p['age']] += 1
    intervals = [[11, 0], [15, 0], [18, 0], [21, 0], [24, 0], [27, 0], [30, 0], [35, 0], [45, 0], [120, 0]]
    for age, count in ages.iteritems():
        for i in intervals:
            if age < i[0]:
                i[1] += count
                break
    sum = 0
    for i in intervals:
        sum += i[1]
    percentages = [100 * float(i[1]) / sum for i in intervals]
    cur_age = 0
    for i in xrange(len(intervals)):
        print str(cur_age) + "-" + str(intervals[i][0]) + ": " + str(percentages[i]) + "%"
        cur_age = intervals[i][0] + 1
コード例 #2
0
ファイル: user_slicer.py プロジェクト: whateverpal/group_spy
 def __init__(self, credentials, group_id, offset, count):
     self._group_id = group_id
     self._crawler = VKCrawler([credentials])
     self._target = self.do_job
     self._offset = offset
     self._count = count
     Thread.__init__(self)
コード例 #3
0
ファイル: user_slicer.py プロジェクト: whateverpal/group_spy
class GroupMembersJob(Thread):

    response = None
    
    def __init__(self, credentials, group_id, offset, count):
        self._group_id = group_id
        self._crawler = VKCrawler([credentials])
        self._target = self.do_job
        self._offset = offset
        self._count = count
        Thread.__init__(self)
        
    def run(self):
        self._target()
    
    def do_job(self):
        self.response = list(self._crawler.get_group_members(self._group_id, self._offset, self._count))