def reduce_location(l): val = l.lower( val = val.strip() val = val.replace('usa','') val = val.replace('united state','') val = re.sub(r'\d*$','',val) m = re.search('([\w ]*)\s*[\s,/]\s*(\w+\s*\w*)(\W*)',val) if m: val = (m.group(1).strip(), m.group(2).strip()) #print 'compressing 1 : \'%s\' FROM \'%s\''%(val,l) else: m = re.search('(.*?)(\W)+(\w+)(\W*)',val) if m: val = (m.group(1).strip(), m.group(3).strip()) #print 'compressing 2 : \'%s\' FROM \'%s\''%(val,l) else: val = ('', val) if val[1] in STATES_NORMALIZED: val = (val[0], STATES_NORMALIZED[val[1]].lower()) #print ' compressing 3 : \'%s\' FROM \'%s\''%(val,l) test = '%s, %s'%(val[0], val[1]) if val[1] in STATES_NORMALIZED: return val if test == 'san, francisco': return ('san francisco', 'ca') if test == 'bronx, ny': return ('new york', 'ny') if test == 'new, york': return ('', 'ny') if test == 'houston, ': return ('houston', 'tx') if test == 'chicago, ': return ('chicago', 'il') if test == 'seattle, ': return ('seattle', 'wa') if test == 'brooklyn, ': return ('brooklyn', 'ny') if test == 'philadelphia, ': return ('philadelphia', 'pa') if test == 'dallas, ': return ('dallas', 'tx') if test == 'arlington, ': return ('arlington', 'va') if test == 'nashville, ': return ('nashville', 'tn') if test == 'chicago, ': return ('chicago', 'il') if test == 'chicago, ': return ('chicago', 'il') if test == 'washington, district of': return ('washington', 'dc') if test == 'los, angeles' or test == 'la, ': return ('los angeles, ca') if test == 'san, diego': return ('san diego', 'ca') if val[0] in STATES_NORMALIZED and val[1] == '': return ('', val[0]) if val[0] == '': return (val[1], '') return val unique_locations = {} location_results = {} for key in users.keys(): u = users[key] orig_loc = u['details'][4] loc = reduce_location(orig_loc) if orig_loc not in unique_locations: unique_locations[orig_loc] = loc if len(u['sub']) == 1: if loc in location_results: location_results[loc]['sub'] += 1 else: location_results[loc] = {'sub': 1, 'free': 0} else: if loc in location_results: location_results[loc]['free'] += 1 else: location_results[loc] = {'sub': 0, 'free': 1} len(unique_locations.keys()) len(location_results.keys()) for l in location_results.keys(): if location_results[l]['sub'] > 2: print '%i\t%i\t%s'%(location_results[l]['free'], location_results[l]['sub'], l) filename = 'location_stats.csv' f = open(filename, 'w') for l in location_results.keys(): if location_results[l]['sub'] > 0 or location_results[l]['free'] > 100: print('%i\t%i\t%s'%(location_results[l]['free'], location_results[l]['sub'], l)) f.write('%i\t%i\t%s\n'%(location_results[l]['free'], location_results[l]['sub'], l)) f.close() # TODO - collect emails of users, see if they were shared to BEFORE their join date # TODO - collect shares SENT to the users user_ids = [long(i) for i in users.keys()] shares = {} query = ShareActions.objects.filter(user_id__in=user_ids, app=settings.APPS.Rdio).values_list('user_id', 'recipient_id', 'type','method','date_shared') print ' .. chunking user info' for share_chunk in chunk_query(query, 500): print ' .... chunk' for s in share_chunk: if s.user_id in shares: shares[s.user_id].append(s) else: shares[s.user_id] = [s] shared_to = {} query = ShareActions.objects.filter(recipient_id__in=user_ids, app=settings.APPS.Rdio).values_list('user_id', 'recipient_id', 'type','method','date_shared') print ' .. chunking user info' for share_chunk in chunk_query(query, 500): print ' .... chunk' for s in share_chunk: if s.recipient_id in shared_to: shared_to[s.recipient_id].append(s) else: shared_to[s.recipient_id] = [s] for u in shares.keys(): users[str(u)]['shared'] = shares[u] for u in shared_to.keys(): users[str(u)]['shared_to'] = shared_to[u] share_summary = { 'avg_free_shared': [], 'avg_free_shared_to': [], 'avg_shared_before': [], 'avg_shared_after': [], 'avg_shared_to_before': [], 'avg_shared_to_after': [] } filename = 'share_info.csv' f = open(filename, 'w') for u in set(shares.keys()).union(set(shared_to.keys())): u_sharer = users[str(u)] date_joined = '%s %s'%(u_sharer['details'][1],u_sharer['details'][2]) date_joined = datetime.datetime.strptime(date_joined, '%Y-%m-%d %H:%M:%S') if len(u_sharer['sub']) == 1: u_sharer['share_summary'] = {'send_before_sub': 0, 'send_after_sub': 0, 'shared_to_before_sub': 0, 'shared_to_after_sub': 0} date_subscribed = '%s %s'%(u_sharer['sub'][0][3],u_sharer['sub'][0][4]) date_subscribed = datetime.datetime.strptime(date_subscribed, '%Y-%m-%d %H:%M:%S') if 'shared' in u_sharer: for s in u_sharer['shared']: recipient = s.recipient_id if recipient is None: recipient = 'n/a' if str(recipient) in users: recipient = 'new_user %i'%len(users[str(recipient)]['sub']) else: recipient = 'other' f.write('shared,%i,%s,%s,%s,%s,%i,%s,%s,%i,%i\n'%( u, u_sharer['sub'][0][3], u_sharer['sub'][0][4], recipient, s.type, s.method, s.date_shared.date(), s.date_shared.time(), (s.date_shared - date_subscribed).days, (s.date_shared - date_joined).days )) if (s.date_shared - date_subscribed).days > 0: u_sharer['share_summary']['send_after_sub'] += 1 else: u_sharer['share_summary']['send_before_sub'] += 1 if u_sharer['share_summary']['send_after_sub']: share_summary['avg_shared_after'].append(u_sharer['share_summary']['send_after_sub']) if u_sharer['share_summary']['send_before_sub']: share_summary['avg_shared_before'].append(u_sharer['share_summary']['send_before_sub']) if 'shared_to' in u_sharer: for s in u_sharer['shared_to']: sender = s.user_id if sender is None: sender = 'n/a' if str(recipient) in users: sender = 'new_user %i'%len(users[str(recipient)]['sub']) else: sender = 'other' f.write('shared_to,%i,%s,%s,%s,%s,%i,%s,%s,%i,%i\n'%( u, u_sharer['sub'][0][3], u_sharer['sub'][0][4], sender, s.type, s.method, s.date_shared.date(), s.date_shared.time(), (s.date_shared - date_subscribed).days, (s.date_shared - date_joined).days )) if (s.date_shared - date_joined).days > 0: u_sharer['share_summary']['shared_to_after_sub'] += 1 else: u_sharer['share_summary']['shared_to_before_sub'] += 1 if u_sharer['share_summary']['shared_to_after_sub']: share_summary['avg_shared_to_after'].append(u_sharer['share_summary']['shared_to_after_sub']) if u_sharer['share_summary']['shared_to_before_sub']: share_summary['avg_shared_to_before'].append(u_sharer['share_summary']['shared_to_before_sub']) else: u_sharer['share_summary'] = {'shared': 0, 'shared_to': 0} if 'shared' in u_sharer: u_sharer['share_summary']['shared'] += len(u_sharer['shared']) share_summary['avg_free_shared'].append(u_sharer['share_summary']['shared']) for s in u_sharer['shared']: recipient = s.recipient_id if recipient is None: recipient = 'n/a' if str(recipient) in users: recipient = 'new_user %i'%len(users[str(recipient)]['sub']) else: recipient = 'other' f.write('shared,%i,%s,%s,%s,%s,%i,%s,%s,%i,%i\n'%( u, '', '', recipient, s.type, s.method, s.date_shared.date(), s.date_shared.time(), 0, (s.date_shared - date_joined).days )) if 'shared_to' in u_sharer: u_sharer['share_summary']['shared_to'] += len(u_sharer['shared_to']) share_summary['avg_free_shared_to'].append(u_sharer['share_summary']['shared_to']) for s in u_sharer['shared_to']: sender = s.user_id if sender is None: sender = 'n/a' if str(recipient) in users: sender = 'new_user %i'%len(users[str(recipient)]['sub']) else: sender = 'other' f.write('shared_to,%i,%s,%s,%s,%s,%i,%s,%s,%i,%i\n'%( u, '','', sender, s.type, s.method, s.date_shared.date(), s.date_shared.time(), 0, (s.date_shared - date_joined).days )) sub_shared = share_summary['avg_shared_before'] + share_summary['avg_shared_after'] print 'SHARED:\n\tfree: %i (avg %.2f) (mean %i) (pop %.4f)\n\tsub: %i (avg %.2f) (mean %i) (pop %.4f)\n\t\tbefore sub: %i (avg %.2f) (mean %i) (pop %.4f)\n\t\tafter sub:%i (avg %.2f) (mean %i) (pop %.4f)'%( sum(share_summary['avg_free_shared']), sum(share_summary['avg_free_shared'])/float(len(share_summary['avg_free_shared'])), sorted(share_summary['avg_free_shared'])[len(share_summary['avg_free_shared'])/2], len(share_summary['avg_free_shared']) / float(stayed_free_user_count), sum(sub_shared), sum(sub_shared)/float(len(sub_shared)), sorted(sub_shared)[len(sub_shared)/2], len(sub_shared) / float(subscribed_user_count), sum(share_summary['avg_shared_before']), sum(share_summary['avg_shared_before'])/float(len(share_summary['avg_shared_before'])), sorted(share_summary['avg_shared_before'])[len(share_summary['avg_shared_before'])/2], len(share_summary['avg_shared_before']) / float(subscribed_user_count), sum(share_summary['avg_shared_after']), sum(share_summary['avg_shared_after'])/float(len(share_summary['avg_shared_after'])), sorted(share_summary['avg_shared_after'])[len(share_summary['avg_shared_after'])/2], len(share_summary['avg_shared_after']) / float(subscribed_user_count) ) sub_shared_to = share_summary['avg_shared_to_before'] + share_summary['avg_shared_to_after'] print 'SHARED TO:\n\tfree: %i (avg %.2f) (mean %i) (pop %.4f)\n\tsub: %i (avg %.2f) (mean %i) (pop %.4f)\n\t\tbefore sub: %i (avg %.2f) (mean %i) (pop %.4f)\n\t\tafter sub:%i (avg %.2f) (mean %i) (pop %.4f)'%( sum(share_summary['avg_free_shared_to']), sum(share_summary['avg_free_shared_to'])/float(len(share_summary['avg_free_shared_to'])), sorted(share_summary['avg_free_shared_to'])[len(share_summary['avg_free_shared_to'])/2], len(share_summary['avg_free_shared_to']) / float(stayed_free_user_count), sum(sub_shared_to), sum(sub_shared_to)/float(len(sub_shared_to)), sorted(sub_shared_to)[len(sub_shared_to)/2], len(sub_shared_to) / float(subscribed_user_count), sum(share_summary['avg_shared_to_before']), sum(share_summary['avg_shared_to_before'])/float(len(share_summary['avg_shared_to_before'])), sorted(share_summary['avg_shared_to_before'])[len(share_summary['avg_shared_to_before'])/2], len(share_summary['avg_shared_to_before']) / float(subscribed_user_count), sum(share_summary['avg_shared_to_after']), sum(share_summary['avg_shared_to_after'])/float(len(share_summary['avg_shared_to_after'])), sorted(share_summary['avg_shared_to_after'])[len(share_summary['avg_shared_to_after'])/2], len(share_summary['avg_shared_to_after']) / float(subscribed_user_count) ) stayed_free_user_count = 0 subscribed_user_count = 0 odd_user_count = 0 for key in users.keys(): if len(users[key]['sub']) == 0: stayed_free_user_count += 1 elif len(users[key]['sub']) == 1: subscribed_user_count += 1 else: odd_user_count += 1 query = UserFollowers.objects.filter(follower__in=user_ids) print ' .. chunking follower info' for follow_chunk in chunk_query(query, 5000): print ' .... chunk' for f in follow_chunk: key = str(f.follower.id) if 'is_follower' in users[key]: users[key]['is_follower'].append({ 'followee': f.followee.id, 'date': f.date_added}) else: users[key]['is_follower'] = [{ 'followee': f.followee.id, 'date': f.date_added}] filename = 'is_follower_info.csv' file = open(filename, 'w') for key in users.keys(): if 'is_follower' in users[key]: u = users[key] date_joined = '%s %s'%(u['details'][1], u['details'][2]) date_joined = datetime.datetime.strptime(date_joined, '%Y-%m-%d %H:%M:%S') date_subscribed = False account = 'f' if len(u['sub']) == 1: account = 's' date_subscribed = '%s %s'%(u['sub'][0][3],u['sub'][0][4]) date_subscribed = datetime.datetime.strptime(date_subscribed, '%Y-%m-%d %H:%M:%S') days = {} for f in u['is_follower']: dayKey = f['date'].date() if dayKey in days: days[dayKey]['count'] += 1 else: days_since_join = (f['date'] - date_joined).days days_since_sub = 0 if date_subscribed: days_since_sub = (f['date'] - date_subscribed).days days[dayKey] = { 'count': 1, 'since_sub': days_since_sub, 'date': f['date'].date(),'since_join': days_since_join, 'total': len(u['is_follower']) } #print '%s,%s,%s,%s,%i,%i'%(key, followee, f['date'].date(), f['date'].time(), days_since_join, days_since_sub) #file.write('%s,%s,%s,%s,%i,%i,%i,%i\n'%(key, followee, f['date'].date(), f['date'].time(), days_since_join, days_since_sub, index, len(u['is_follower']))) #index += 1 for d in days: d = days[d] file.write('%s,%s,%i,%i,%s,%i,%i\n'%(key, account,d['count'],d['total'],d['date'],d['since_join'],d['since_sub'])) query = UserFollowers.objects.filter(followee__in=user_ids) print ' .. chunking followee info' for follow_chunk in chunk_query(query, 5000): print ' .... chunk' for f in follow_chunk: key = str(f.followee.id) if 'is_followee' in users[key]: users[key]['is_followee'].append({ 'follower': f.follower.id, 'date': f.date_added}) else: users[key]['is_followee'] = [{ 'follower': f.follower.id, 'date': f.date_added}] filename = 'is_followee_info.csv' file = open(filename, 'w') for key in users.keys(): if 'is_followee' in users[key]: u = users[key] date_joined = '%s %s'%(u['details'][1], u['details'][2]) date_joined = datetime.datetime.strptime(date_joined, '%Y-%m-%d %H:%M:%S') date_subscribed = False account = 'f' if len(u['sub']) == 1: account = 's' date_subscribed = '%s %s'%(u['sub'][0][3],u['sub'][0][4]) date_subscribed = datetime.datetime.strptime(date_subscribed, '%Y-%m-%d %H:%M:%S') days = {} for f in u['is_followee']: dayKey = f['date'].date() if dayKey in days: days[dayKey]['count'] += 1 else: days_since_join = (f['date'] - date_joined).days days_since_sub = 0 if date_subscribed: days_since_sub = (f['date'] - date_subscribed).days days[dayKey] = { 'count': 1, 'since_sub': days_since_sub, 'date': f['date'].date(),'since_join': days_since_join, 'total': len(u['is_followee']) } #print '%s,%s,%s,%s,%i,%i'%(key, followee, f['date'].date(), f['date'].time(), days_since_join, days_since_sub) #file.write('%s,%s,%s,%s,%i,%i,%i,%i\n'%(key, followee, f['date'].date(), f['date'].time(), days_since_join, days_since_sub, index, len(u['is_followee']))) #index += 1 for d in days: d = days[d] file.write('%s,%s,%i,%i,%s,%i,%i\n'%(key, account,d['count'],d['total'],d['date'],d['since_join'],d['since_sub'])) filename = 'user_keys.csv' file = open(filename, 'w') for key in users.keys(): u = users[key] if len(u['sub']) == 0: file.write('%s,%s\n'%(key,'f')) elif len(u['sub']) == 1: file.write('%s,%s\n'%(key,'t')) else: file.write('%s,%s\n'%(key,'x')) user_ids = [long(i) for i in users.keys()] more_info_stuff = {} query = User.objects.filter(id__in=user_ids) print ' .. chunking misc info' for misc_info_chunk in chunk_query(query, 5000): print ' .... chunk' for u in misc_info_chunk: key = str(u.id) more_info_stuff[key] = { 'email': u.email, 'login': u.last_login, 'join': u.date_joined, 'reviews': u.review_count, 'last_update': u.last_updated, 'device_count': len(u.offlinedevice_set.all()) } filename = 'misc_user_info.csv' file = open(filename, 'w') for key in more_info_stuff.keys(): if str(key) not in users: print "WHAT THE F**K %s"%key else: user = users[key] data = more_info_stuff[key] email = re.sub(r'.*@(.*)',r'\1',data['email']) days_active = (data['last_update'] - data['join']).days file.write('%s,%s,%i,%i,%i\n'%(key,email,days_active,data['reviews'],data['device_count'])) # user['misc'] = { # 'email_domain': email, # 'reviews': data['reviews'], # 'days_active': days_active, # 'device_count': data['device_count'] # } playlists = {} query = Playlist.objects.filter(owner__in=user_ids) print ' .. chunking playlist info' for playlist_chunk in chunk_query(query, 5000): print ' .... chunk' for p in playlist_chunk: key = str(p.owner.id) if key not in playlists: playlists[key] = [] playlists[key].append({ 'created': p.created, 'updates': p.counts_updated, 'size': len(p.playlist_info.entries) }) filename = 'playlists.csv' file = open(filename, 'w') for key in playlists: u = users[key] u['playlists'] = [] date_joined = '%s %s'%(u['details'][1], u['details'][2]) date_joined = datetime.datetime.strptime(date_joined, '%Y-%m-%d %H:%M:%S') for p in playlists[key]: day_made = (p['created'] - date_joined).days users[key]['playlists'].append({ 'day_made': day_made, 'size': p['size']}) file.write('%s,%i,%i,%s,%s\n'%(key, p['size'], day_made, p['created'].date(), p['created'].time())) # 0 1 2 3 1= device, 2= day, 3 = songs played #15089702,2,0.0,43 f_plays = open('/Users/rebecca/Work/hackday/novemberHack/plays_october.csv', 'r') for l in f_plays: l = l.split(',') key = l[0] u = users[key] if 'plays' not in u: u['plays'] = [] u['plays'].append({ 'device': l[1], 'day': l[2], 'started_songs': l[3].strip()}) f_plays.close() f_tracked_plays = open('/Users/rebecca/Work/hackday/novemberHack/plays_tracked_october.csv', 'r') for l in f_tracked_plays: l = l.split(',') key = l[0] u = users[key] if 'plays' not in u: print 'WHAT THE HELL!?!? %s'%key else: for p in u['plays']: if p['device'] == l[1] and p['day'] == l[2]: p['tracked_songs'] = l[3].strip() else: print "\t .. how can I have a tracked count without a play count? %s : %s %s"%(key,l[1],l[2]) f_tracked_plays.close() plays_summary = { 'no_plays': 0 } for day in range(0,11): plays_summary['free_players_%s'%day] = 0 plays_summary['free_ratio_%s'%day] = 0 plays_summary['free_skips_%s'%day] = 0 plays_summary['free_plays_%s'%day] = 0 plays_summary['sub_players_%s'%day] = 0 plays_summary['sub_ratio_%s'%day] = 0 plays_summary['sub_skips_%s'%day] = 0 plays_summary['sub_plays_%s'%day] = 0 for key in users.keys(): u = users[key] sub = len(users[key]['sub']) == 1 if 'plays' not in u: plays_summary['no_plays'] += 1 elif sub: for p in u['plays']: i = int(float(p['day'])) tracked_songs = int(p['tracked_songs']) if 'tracked_songs' in p else 0 plays_summary['sub_plays_%s'%i] += tracked_songs plays_summary['sub_skips_%s'%i] += int(p['started_songs']) - tracked_songs plays_summary['sub_ratio_%s'%i] += plays_summary['sub_skips_%i'%i] / float(plays_summary['sub_plays_%i'%i] + plays_summary['sub_skips_%i'%i]) plays_summary['sub_players_%s'%i] += 1.0 else: for p in u['plays']: i = int(float(p['day'])) tracked_songs = int(p['tracked_songs']) if 'tracked_songs' in p else 0 plays_summary['free_plays_%s'%i] += tracked_songs plays_summary['free_skips_%s'%i] += int(p['started_songs']) - tracked_songs plays_summary['free_ratio_%s'%i] += plays_summary['free_skips_%i'%i] / float(plays_summary['free_plays_%i'%i] + plays_summary['free_skips_%i'%i]) plays_summary['free_players_%s'%i] += 1.0 for day in range(0,10): print "DAY %i"%day v = plays_summary['free_skips_%i'%day] / float(plays_summary['free_plays_%i'%day] + plays_summary['free_skips_%i'%day]) print " free: %.03f ratio\t%.03f avg ratio"%(v,plays_summary['free_ratio_%s'%day]/plays_summary['free_players_%s'%day]) v = plays_summary['sub_skips_%i'%day] / float(plays_summary['sub_plays_%i'%day] + plays_summary['sub_skips_%i'%day]) print " sub: %.03f\t%.03f"%(v,plays_summary['sub_ratio_%s'%day]/plays_summary['sub_players_%s'%day]) filename = 'play_summary.csv' file = open(filename, 'w') cut_off_date = datetime.datetime(2013,10,8) for key in users.keys(): u = users[key] date_joined = '%s %s'%(u['details'][1], u['details'][2]) date_joined = datetime.datetime.strptime(date_joined, '%Y-%m-%d %H:%M:%S') if (date_joined - cut_off_date).days < 0: sub = len(users[key]['sub']) == 1 if 'plays' in u: for p in u['plays']: day = int(float(p['day'])) if day < 3: tracked_songs = int(p['tracked_songs']) if 'tracked_songs' in p else 0 file.write('%s,%s,%s,%i,%s,%s,%i\n'%(key,sub,date_joined.date(),day,p['device'],p['started_songs'],tracked_songs)) else: file.write('%s,%s,%s,%i,%s,%s,%i\n'%(key,sub,date_joined.date(),0,-1,0,0)) formal_users = {} for key in users.keys(): u = users[key] date_joined = '%s %s'%(u['details'][1], u['details'][2]) date_joined = datetime.datetime.strptime(date_joined, '%Y-%m-%d %H:%M:%S') formal_users[key] = { 'id': key, 'join_date': u['details'][1], 'join_time': u['details'][2], 'sub_date': u['sub'][0][3] if len(u['sub']) == 1 else -99, 'sub_time': u['sub'][0][4] if len(u['sub']) == 1 else -99, 'follower_day_0': 0, 'follower_1_to_sub': 0, 'follower_post_sub': 0, 'followee_pre_sub': 0, 'followee_post_sub': 0, 'shared_pre_sub': 0, 'shared_post_sub': 0, 'shared_to_pre_sub': 0, 'shared_to_post_sub': 0, 'playlists_pre_sub': [], 'playlists_post_sub': [], 'gender': u['details'][5].strip(), 'loc_0': u['loc'][0], 'loc_1': u['loc'][1], 'age': u['age'], 'days_to_sub': u['days_to_sub'] if 'days_to_sub' in u else -6, 'days_active': u['misc']['days_active'], 'plays': [], 'email_domain': u['misc']['email_domain'], 'device_count': u['misc']['device_count'], 'reviews': u['misc']['reviews'] } if 'playlists' in u: for p in u['playlists']: if p['day_made'] > formal_users[key]['days_to_sub'] and formal_users[key]['days_to_sub'] >= 0: formal_users[key]['playlists_post_sub'].append(p['size']) else: formal_users[key]['playlists_pre_sub'].append(p['size']) if 'is_follower' in u: for f in u['is_follower']: days_to_follow = (f['date'] - date_joined).days if days_to_follow == 0: formal_users[key]['follower_day_0'] += 1 elif days_to_follow > formal_users[key]['days_to_sub'] and formal_users[key]['days_to_sub'] >= 0: formal_users[key]['followee_post_sub'] += 1 else: formal_users[key]['followee_pre_sub'] += 1 if 'is_followee' in u: for f in u['is_followee']: days_to_follow = (f['date'] - date_joined).days if days_to_follow > formal_users[key]['days_to_sub'] and formal_users[key]['days_to_sub'] >= 0: formal_users[key]['followee_pre_sub'] += 1 else: formal_users[key]['followee_post_sub'] += 1 if 'shared' in u: for s in u['shared']: days_to_share = (s.date_shared - date_joined).days if days_to_share > formal_users[key]['days_to_sub'] and formal_users[key]['days_to_sub'] >= 0: formal_users[key]['shared_post_sub'] += 1 else: formal_users[key]['shared_pre_sub'] += 1 if 'shared_to' in u: for s in u['shared_to']: days_to_share = (s.date_shared - date_joined).days if days_to_share > formal_users[key]['days_to_sub'] and formal_users[key]['days_to_sub'] >= 0: formal_users[key]['shared_to_post_sub'] += 1 else: formal_users[key]['shared_to_pre_sub'] += 1 if 'plays' in u: if (date_joined - cut_off_date).days < 0: sub = len(users[key]['sub']) == 1 for p in u['plays']: day = int(float(p['day'])) if day < 3: tracked_songs = int(p['tracked_songs']) if 'tracked_songs' in p else 0 formal_users[key]['plays'].append((day,int(p['device']),int(p['started_songs']),tracked_songs)) mylist = [u for u in u['plays'] if isinstance(u,dict)] filename = 'formal_user_data_3.csv' file = open(filename, 'w') json.dump(formal_users, file) reduced_formal_users = {} keys = formal_users.keys()[:20] for key in keys: reduced_formal_users[key] = formal_users[key] filename = 'formal_user_data_short_test.csv' file = open(filename, 'w') file.write ("var edges = "); json.dump(reduced_formal_users, file) file.close() for key in users.keys(): u = users[key] if 'playlists' in u: print 6/0
import datetime from rdio.auth.models import UserFollowers from rdio.analytics.models import ShareActions from django.conf import settings from rdio.library.models.playlist import Playlist import json for day in range(1,11) : print "Starting day %i"%day user_ids = [] user_details = [] query = User.objects.filter(date_joined__gte=datetime.datetime(2013,10,day),date_joined__lte=datetime.datetime(2013,10,day+1)) print ' .. chunking user info' for user_chunk in chunk_query(query, 1000): print ' .... chunk' for u in user_chunk: if u.country_code == 'US': user_ids.append(u.id) user_details.append((u.id, u.date_joined, u.birthday, u.location, u.gender)) subs = SubscriptionHistory.objects.filter( event__in=(SubscriptionHistoryEvent.UserSubscribe, SubscriptionHistoryEvent.UserUnsubscribe, SubscriptionHistoryEvent.UserCancelUnsubscribe), user_id__in=user_ids).values_list('user_id', 'old_subscription_type', 'new_subscription_type','date_changed') print " .. chewing on subscription data" subs = list(subs) print " .. writing to files" filename = 'subscribe_list_%i.csv'%day f = open(filename, 'w') for z in subs: f.write('%i,%i,%i,%s,%s\n'%(z[0],z[1],z[2],z[3].date(),z[3].time()))