def set_present_date(self, present_date): if present_date and str(present_date).lower() != 'none': if present_date.tzinfo is None: present_date = pytz.utc.localize(present_date) self._present_date = present_date else: self._present_date = dt.datetime.now(pytz.utc) self.short_term_cutoff_date = self._present_date - dt.timedelta(self.short_term_window) self.short_term_cutoff_date = du.get_day(self.short_term_cutoff_date) self.long_term_cutoff_date = self._present_date - dt.timedelta(self.long_term_window) self.long_term_cutoff_date = du.get_day(self.long_term_cutoff_date) self.popularity_cutoff_date = self._present_date - dt.timedelta(self.popularity_window) self.popularity_cutoff_date = du.get_day(self.popularity_cutoff_date)
def test_present_date(self): """ Tests the update of long and short term cutoff dates based on an updated value of the present date. """ new_date = dt.datetime(1988, 11, 6, 10, 0) self.context.set_present_date(new_date) localized_day = du.get_day(pytz.utc.localize(new_date)) new_short_term_cutoff_date = localized_day - dt.timedelta(days=self.context.short_term_window) new_long_term_cutoff_date = localized_day - dt.timedelta(days=self.context.long_term_window) nose.tools.eq_(self.context.short_term_cutoff_date, new_short_term_cutoff_date) nose.tools.eq_(self.context.long_term_cutoff_date, new_long_term_cutoff_date)
def flush_summaries(database, popularity_summaries_by_product): log.info("Saving %d summaries..." % len(popularity_summaries_by_product)) bulk_op = database.popularities_summary.initialize_unordered_bulk_op() where = {"p_id": {"$in": list(popularity_summaries_by_product.keys())}} fields = {"p_id": True, "count": True, "first": True, "latest": True, "_id": False} cursor = database.popularities_summary.find(where, fields) current_summaries_by_product = {rec["p_id"]: rec for rec in cursor} for product, popularity_summary in popularity_summaries_by_product.items(): current_summary = current_summaries_by_product.get(product, { "count": 0, "first": pytz.utc.localize(dt.datetime(3000, 1, 1)), "latest": pytz.utc.localize(dt.datetime(1, 1, 1))}) current_count = current_summary["count"] current_first = current_summary["first"] current_latest = current_summary["latest"] first = min(current_first, popularity_summary["first"]) latest = max(current_latest, popularity_summary["latest"]) new_count = current_count + popularity_summary["count"] if first != current_first or latest != current_latest or new_count != current_count: first_day = du.get_day(first) latest_day = du.get_day(latest) day_span = (latest_day - first_day).days + 1 new_popularity = new_count / day_span spec = {"p_id": product} update_clause = {"$set": {"first": first_day, "latest": latest_day, "count": new_count, "popularity": new_popularity}} bulk_op.find(spec).upsert().update(update_clause) bulk_op.execute() popularity_summaries_by_product.clear()
def fetch_activities_cursor(database, latest_processed_activity_date, max_date, activity_types): where = {"activity": {"$in": activity_types}} if latest_processed_activity_date is not None: where["day"] = {"$gte": du.get_day(latest_processed_activity_date)} if max_date is not None: date_clause = where.get("created_at", {}) date_clause.update({"$lt": max_date}) where["created_at"] = date_clause fields = {"_id": False, "activity": True, "created_at": True, "external_user_id": True, "external_product_id": True} cursor = database.activities_summary.find( where, fields, timeout=False) return cursor
def flush_summaries(database, latest_activity_by_user_and_product): log.info("Saving %d summaries..." % len(latest_activity_by_user_and_product)) bulk_op = database.activities_summary.initialize_unordered_bulk_op() for user_and_product, activity in latest_activity_by_user_and_product.items(): # upserts the (u,p) pair spec = {"external_user_id": user_and_product[0], "external_product_id": user_and_product[1]} operator = "$set" activity_date = activity["created_at"] day = du.get_day(activity_date) update_clause = {operator: {"activity": activity["activity"], "day": day, "created_at": activity_date}} bulk_op.find(spec).upsert().update(update_clause) bulk_op.execute() latest_activity_by_user_and_product.clear()