def process_item(self, item, spider): if isinstance(item, ShopInfoItem): shop_info = item['shop_info'] filter_dict = {'shop_url': shop_info['shop_url'], 'update_time': {'$gte': get_first_day_of_month(), '$lte': get_first_day_of_month() + 30 * 24 * 60 * 60}} self.shop_coll.update(filter_dict, {'$set': shop_info}, upsert=True) elif isinstance(item, CommtentInfoItem): print('item----comment-----------') comment_info = item['comment_info'] filter_dict = {'shop_url': comment_info['shop_url'], 'user_url': comment_info['user_url'], 'comment_time': comment_info['comment_time'], 'update_time': {'$gte': get_first_day_of_month(), '$lte': get_first_day_of_month() + 30 * 24 * 60 * 60}} self.comment_coll.update(filter_dict, {'$set': comment_info}, upsert=True) return item # 会在控制台输出原item数据,可以选择不写
def request_seen(self, request): """ 检测当前请求是否已经被访问过 :param request: :return: True表示已经访问过;False表示未访问过 """ # if request.url.endswith('/food'): # return False filter_dict = { 'url': request.url, 'update_time': { '$gte': get_first_day_of_month(), '$lte': get_first_day_of_month() + 30 * 24 * 60 * 60 } } res = self.url_coll.find_one(filter_dict) if res: return True url_info = {'url': request.url, 'update_time': time.time()} self.url_coll.update(filter_dict, {'$set': url_info}, upsert=True) return False
def get_new_workouts_for_user(user, since=None): if since is not None: since_datetime = since else: since_datetime = get_first_day_of_month(user) social_auth_users = UserSocialAuth.objects.filter(user=user) # This task was stubbed out like this to allow for fetching # workouts from multiple providers. See old runkeeper below # runkeeper_users = [sau for sau in social_auth_users if sau.provider == 'runkeeper'] mmf_users = [sau for sau in social_auth_users if sau.provider == 'mapmyfitness'] get_new_mmf_workouts.delay(mmf_users, since_datetime)
def update_leaderboards_for_user(user, year=None, month=None): """ Monthly """ timespans = ('all', 'monthly',) activity_types = { 'all': None, 'run': Workout.TYPE_RUN, 'ride': Workout.TYPE_RIDE, 'walk': Workout.TYPE_WALK, } all_time_workouts = Workout.objects.filter(user=user) if year is None and month is None: leaderboard_start_datetime = get_first_day_of_month(user) else: str_user_timezone = user.profile.timezone user_timezone = timezone(str_user_timezone) leaderboard_start_datetime = datetime.datetime(year, month, 1, tzinfo=user_timezone) leaderboard_end_datetime = leaderboard_start_datetime + relativedelta(months=1) monthly_workouts = all_time_workouts.filter(user=user, start_datetime__gte=leaderboard_start_datetime, start_datetime__lt=leaderboard_end_datetime) for timespan in timespans: if timespan == 'all': workouts = all_time_workouts kwargs = {'all_time': True} else: workouts = monthly_workouts kwargs = {'year': leaderboard_start_datetime.year, 'month': leaderboard_start_datetime.month} for activity_type, activity_enum in activity_types.iteritems(): if activity_type != 'all': these_workouts = workouts.filter(type=activity_enum) else: these_workouts = workouts workouts_ids = these_workouts.values_list('id', flat=True) workout_markers = WorkoutMarker.objects.filter(workout__id__in=workouts_ids).distinct('marker') points = 0 for workout_marker in workout_markers: points += workout_marker.marker.point_value if timespan == 'all': timespan_string = 'all' else: timespan_string = 'monthly ({0}-{1})'.format(leaderboard_start_datetime.year, leaderboard_start_datetime.month) logger.info('Creating/Updating {0} {1} points for {2}: {3}'.format(timespan_string, activity_type, user, points)) create_or_update_entry(points, user, activity_type, **kwargs)