def get_report(self,time_period,current_period=None,trending_function=biggest_change): self._group_listings(time_period) print "\nTrending Items: {}".format(time_period) if not current_period: current_period = trunc_date(datetime.datetime.now().date(),time_period) print "Current Period: {}".format(current_period) for category, category_listings in self._groups.iteritems(): print "\n{}\n===========".format(category) trending = self._get_trending(category_listings,current_period,time_period,trending_function) for rank, index in enumerate(trending): word_or_phrase = self._lexicon[index] if type(word_or_phrase) == tuple: word_or_phrase = " ".join(word_or_phrase) print "{}. {}".format(rank+1,word_or_phrase.encode('utf-8'))
def _group_listings(self,time_period):#parsed may or may not be an attribute """Returns a nested dictionary of grouped, vectorized listings, where the groups are categories then time periods Ex: lexicon = ["big","dog","barks","loudly] parsed_listings = [([1,2],'business','2012-01-12'), ([1,3],'business',2013-03-01'), ([0,1,2],'marketing','2014-04-01'),([0,2],'marketing','2013-01-01')] group_listings(parsed_listings,lexicon_size=4,time_period='year') => {"business": {datetime.date(2012, 1, 1): [[0,1,1,0]],datetime.date(2013, 1, 1): [[0,1,0,1]]}, "marketing":{datetime.date(2013, 1, 1):[[0,0,1,0]] , datetime.date(2014, 1, 1): [[1,1,1,0]]}}""" lexicon_size = len(self._lexicon) for (int_tokens, categories, date) in self._parsed_listings: date_period = trunc_date(get_date(date),time_period) vector_listing = get_vectorized_listing(int_tokens,lexicon_size) for category in categories: group_listings = self._groups.setdefault(category,{}) category_date_listings = self._groups[category].setdefault(date_period,[]) self._groups[category][date_period].append(vector_listing)