def venues_from_top_n_categories(m=40, n=5): """ Gets the top m venues from the top n categories ordered by shout_count :param m: the number of venues from each category to return :type m: int :param n: the number of categories to return :type n: int :return: 2 lists - one for categories, and one for venues :rtype: ([], []) """ cats = Venue.raw('SELECT cat_id, cat_name ' 'FROM venue ' 'GROUP BY cat_id ' 'ORDER BY sum(shout_count) DESC ' 'LIMIT ?', n) cat_list = [] venue_list = [] for v in cats.execute(): vens = [ven for ven in (Venue .select() .where(Venue.cat_id == v.cat_id) .order_by(-Venue.shout_count) .limit(m))] cat_list.append(v.cat_name) venue_list.extend(vens) return cat_list, venue_list
def venues_to_docs(): """ For each venue, creates a file of all related shouts, one shout per line. :return: None :rtype: None """ for ven in Venue.select(): with codecs.open('../../data/ven/{}.txt'.format(ven.id), 'w', encoding='utf-8') as ven_f: for checkin in ven.checkins: ven_f.write(checkin.shout) ven_f.write('\n')
def get_ven_by_id(ven_id): """ Get venue name for ven_id. :param ven_id: ID of venue to find name :type ven_id: str :return: Venue :rtype: database row """ try: return Venue.get(Venue.id == ven_id) except Venue.DoesNotExist: print(u'Venue with that ID does not exist.')
def venues_to_doc(fname='../../data/allVenues.txt'): """ Writes all venues to one file. :param fname: filename of output file :return: None :rtype: None """ with codecs.open(fname, 'w', encoding='utf-8') as fout: for ven in Venue.select(): fout.write(ven.id + ' ') checkin_str = u' '.join(ven.checkins) fout.write(checkin_str) fout.write('\n')
def update_shout_counts(): """ Counts total # of shouts for each user and each venue. :return: None :rtype: None """ venue_subquery = Checkin.select(pw.fn.COUNT(Checkin.id)).where(Checkin.venue == Venue.id) venue_update = Venue.update(shout_count=venue_subquery) venue_update.execute() user_subquery = Checkin.select(pw.fn.COUNT(Checkin.id)).where(Checkin.user == User.id) user_update = User.update(shout_count=user_subquery) user_update.execute()
def get_categories(n): """ Gets set of categories for venues whose shout_count > n. :param n: shout_count > n :type n: int :return: set of category names :rtype: [str] """ return [ven.cat_name for ven in Venue.raw('select distinct cat_name ' 'from venue ' 'where id in ' '(select venue.id ' 'from venue ' 'where shout_count > ?)', n)]