def exception_change_feed_link(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) feed_link = request.POST['feed_link'] code = -1 if not feed.has_page_exception and not feed.has_feed_exception: logging.info(" ***********> [%s] Incorrect feed link change: %s" % (request.user, feed)) # This Forbidden-403 throws an error, which sounds pretty good to me right now return HttpResponseForbidden() feed_address = feedfinder.feed(feed_link) if feed_address: code = 1 feed.has_page_exception = False feed.active = True feed.fetched_once = False feed.feed_link = feed_link feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.now() try: feed.save() except IntegrityError: original_feed = Feed.objects.get(feed_address=feed_address) original_feed.next_scheduled_update = datetime.datetime.now() original_feed.has_page_exception = False original_feed.active = True original_feed.save() merge_feeds(original_feed.pk, feed.pk) return {'code': code}
def exception_change_feed_address(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) feed_address = request.POST['feed_address'] if not feed.has_feed_exception and not feed.has_page_exception: logging.info(" ***> [%s] ~BRIncorrect feed address change: ~SB%s" % (request.user, feed)) return HttpResponseForbidden() feed.has_feed_exception = False feed.active = True feed.fetched_once = False feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.utcnow() retry_feed = feed duplicate_feed_id = feed.save() if duplicate_feed_id: original_feed = Feed.objects.get(pk=duplicate_feed_id) retry_feed = original_feed original_feed.next_scheduled_update = datetime.datetime.utcnow() original_feed.has_feed_exception = False original_feed.active = True original_feed.save() merge_feeds(original_feed.pk, feed.pk) logging.user(request, "~FRFixing feed exception by address: ~SB%s" % (retry_feed.feed_address)) retry_feed.update() usersub = UserSubscription.objects.get(user=request.user, feed=retry_feed) usersub.calculate_feed_scores(silent=False) feeds = {feed.pk: usersub.canonical(full=True)} return {'code': 1, 'feeds': feeds}
def exception_change_feed_address(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) feed_address = request.POST['feed_address'] if not feed.has_feed_exception and not feed.has_page_exception: logging.info(" ***********> [%s] Incorrect feed address change: %s" % (request.user, feed)) return HttpResponseForbidden() feed.has_feed_exception = False feed.active = True feed.fetched_once = False feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.now() try: feed.save() except IntegrityError: original_feed = Feed.objects.get(feed_address=feed_address) original_feed.next_scheduled_update = datetime.datetime.now() original_feed.has_feed_exception = False original_feed.active = True original_feed.save() merge_feeds(original_feed.pk, feed.pk) return {'code': 1}
def exception_change_feed_address(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) feed_address = request.POST['feed_address'] if not feed.has_feed_exception and not feed.has_page_exception: logging.info(" ***> [%s] ~BRIncorrect feed address change: ~SB%s" % (request.user, feed)) return HttpResponseForbidden() feed.has_feed_exception = False feed.active = True feed.fetched_once = False feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.utcnow() retry_feed = feed duplicate_feed_id = feed.save() if duplicate_feed_id: original_feed = Feed.objects.get(pk=duplicate_feed_id) retry_feed = original_feed original_feed.next_scheduled_update = datetime.datetime.utcnow() original_feed.has_feed_exception = False original_feed.active = True original_feed.save() merge_feeds(original_feed.pk, feed.pk) logging.info(" ---> [%s] ~FRFixing feed exception by address: ~SB%s" % (request.user, retry_feed.feed_address)) retry_feed.update() return {'code': 1}
def exception_change_feed_address(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_address = request.POST['feed_address'] code = -1 if feed.has_page_exception or feed.has_feed_exception: # Fix broken feed logging.user(request, "~FRFixing feed exception by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) feed.has_feed_exception = False feed.active = True feed.fetched_once = False feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.utcnow() duplicate_feed = feed.save() code = 1 if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.next_scheduled_update = datetime.datetime.utcnow() new_feed.has_feed_exception = False new_feed.active = True new_feed.save() merge_feeds(new_feed.pk, feed.pk) else: # Branch good feed logging.user(request, "~FRBranching feed by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) feed, _ = Feed.objects.get_or_create(feed_address=feed_address, feed_link=feed.feed_link) if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_address_locked = True feed.save() code = 1 feed = feed.update() feed = Feed.objects.get(pk=feed.pk) usersub = UserSubscription.objects.get(user=request.user, feed=original_feed) if usersub: usersub.switch_feed(feed, original_feed) usersub = UserSubscription.objects.get(user=request.user, feed=feed) usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, usersub.feed.pk) feeds = { original_feed.pk: usersub.canonical(full=True, classifiers=classifiers), } return { 'code': code, 'feeds': feeds, 'new_feed_id': usersub.feed.pk, }
def test_duplicate_feeds(self): # had to load the feed data this way to hit the save() override. # it wouldn't work with loaddata or fixures with open( 'apps/feed_import/fixtures/duplicate_feeds.json') as json_file: feed_data = json.loads(json_file.read()) feed_data_1 = feed_data[0] feed_data_2 = feed_data[1] feed_1 = Feed(**feed_data_1) feed_2 = Feed(**feed_data_2) feed_1.save() feed_2.save() call_command('loaddata', 'apps/feed_import/fixtures/subscriptions.json') user_1_feed_subscription = UserSubscription.objects.filter( user__id=1)[0].feed_id user_2_feed_subscription = UserSubscription.objects.filter( user__id=2)[0].feed_id self.assertNotEqual(user_1_feed_subscription, user_2_feed_subscription) original_feed_id = merge_feeds(user_1_feed_subscription, user_2_feed_subscription) user_1_feed_subscription = UserSubscription.objects.filter( user__id=1)[0].feed_id user_2_feed_subscription = UserSubscription.objects.filter( user__id=2)[0].feed_id self.assertEqual(user_1_feed_subscription, user_2_feed_subscription)
def handle(self, *args, **options): cursor = connection.cursor() cursor.execute( """SELECT DISTINCT f.id AS original_id, f2.id AS duplicate_id, f.feed_address AS original_feed_address, f2.feed_address AS duplicate_feed_address, f.feed_title AS original_feed_title, f2.feed_title AS duplicate_feed_title, f.feed_link AS original_feed_link, f2.feed_link AS duplicate_feed_link, fd2.feed_tagline AS original_feed_tagline, fd.feed_tagline AS duplicate_feed_tagline FROM feeds f, feeds f2 INNER JOIN rss_feeds_feeddata fd ON fd.feed_id = f.feed_id INNER JOIN rss_feeds_feeddata fd2 ON fd2.feed_id = f2.feed_id WHERE f2.id > f.id AND fd.feed_tagline = fd2.feed_tagline AND f.feed_link = f2.feed_link AND f.feed_title = f2.feed_title ORDER BY original_id ASC;""") feed_fields = ('original_id', 'duplicate_id', 'original_feed_address', 'duplicate_feed_address') skips = 0 merges = 0 for feeds_values in cursor.fetchall(): feeds = dict(zip(feed_fields, feeds_values)) duplicate_stories = MStory.objects( story_feed_id=feeds['duplicate_id']).only('story_guid')[5:8] duplicate_story_ids = [ story.story_guid for story in duplicate_stories ] original_stories = MStory.objects( story_feed_id=feeds['original_id'], story_guid__in=duplicate_story_ids) if duplicate_stories.count() == original_stories.count(): merges += 1 merge_feeds(feeds['original_id'], feeds['duplicate_id']) else: # print duplicate_stories # print duplicate_story_ids # print original_stories # print "Skipping: %s" % feeds skips += 1 print "Skips: %s, Merges: %s" % (skips, merges)
def handle(self, *args, **options): cursor = connection.cursor() cursor.execute("""SELECT DISTINCT f.id AS original_id, f2.id AS duplicate_id, f.feed_address AS original_feed_address, f2.feed_address AS duplicate_feed_address, f.feed_title AS original_feed_title, f2.feed_title AS duplicate_feed_title, f.feed_link AS original_feed_link, f2.feed_link AS duplicate_feed_link, fd2.feed_tagline AS original_feed_tagline, fd.feed_tagline AS duplicate_feed_tagline FROM feeds f, feeds f2 INNER JOIN rss_feeds_feeddata fd ON fd.feed_id = f.feed_id INNER JOIN rss_feeds_feeddata fd2 ON fd2.feed_id = f2.feed_id WHERE f2.id > f.id AND fd.feed_tagline = fd2.feed_tagline AND f.feed_link = f2.feed_link AND f.feed_title = f2.feed_title ORDER BY original_id ASC;""") feed_fields = ('original_id', 'duplicate_id', 'original_feed_address', 'duplicate_feed_address') skips = 0 merges = 0 for feeds_values in cursor.fetchall(): feeds = dict(zip(feed_fields, feeds_values)) duplicate_stories = MStory.objects(story_feed_id=feeds['duplicate_id']).only('story_guid')[5:8] duplicate_story_ids = [story.story_guid for story in duplicate_stories] original_stories = MStory.objects(story_feed_id=feeds['original_id'], story_guid__in=duplicate_story_ids) if duplicate_stories.count() == original_stories.count(): merges += 1 merge_feeds(feeds['original_id'], feeds['duplicate_id']) else: # print duplicate_stories # print duplicate_story_ids # print original_stories # print "Skipping: %s" % feeds skips += 1 print "Skips: %s, Merges: %s" % (skips, merges)
def exception_change_feed_address(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_address = request.POST['feed_address'] timezone = request.user.profile.timezone code = -1 if feed.has_page_exception or feed.has_feed_exception: # Fix broken feed logging.user(request, "~FRFixing feed exception by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) feed.has_feed_exception = False feed.active = True feed.fetched_once = False feed.feed_address = feed_address duplicate_feed = feed.schedule_feed_fetch_immediately() code = 1 if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.schedule_feed_fetch_immediately() new_feed.has_feed_exception = False new_feed.active = True new_feed.save() merge_feeds(new_feed.pk, feed.pk) else: # Branch good feed logging.user(request, "~FRBranching feed by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) feed, _ = Feed.objects.get_or_create(feed_address=feed_address, feed_link=feed.feed_link) code = 1 if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_address_locked = True feed.save() feed = feed.update() feed = Feed.get_by_id(feed.pk) try: usersub = UserSubscription.objects.get(user=request.user, feed=feed) except UserSubscription.DoesNotExist: usersubs = UserSubscription.objects.filter(user=request.user, feed=original_feed) if usersubs: usersub = usersubs[0] usersub.switch_feed(feed, original_feed) else: fetch_history = MFetchHistory.feed(feed_id, timezone=timezone) return { 'code': -1, 'feed_fetch_history': fetch_history['feed_fetch_history'], 'page_fetch_history': fetch_history['page_fetch_history'], 'push_history': fetch_history['push_history'], } usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, feed_id=usersub.feed_id) feeds = { original_feed.pk: usersub and usersub.canonical(full=True, classifiers=classifiers), } if feed and feed.has_feed_exception: code = -1 fetch_history = MFetchHistory.feed(feed_id, timezone=timezone) return { 'code': code, 'feeds': feeds, 'new_feed_id': usersub.feed_id, 'feed_fetch_history': fetch_history['feed_fetch_history'], 'page_fetch_history': fetch_history['page_fetch_history'], 'push_history': fetch_history['push_history'], }
def exception_change_feed_address(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_address = request.POST['feed_address'] code = -1 if feed.has_page_exception or feed.has_feed_exception: # Fix broken feed logging.user( request, "~FRFixing feed exception by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) feed.has_feed_exception = False feed.active = True feed.fetched_once = False feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.utcnow() duplicate_feed = feed.save() code = 1 if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.next_scheduled_update = datetime.datetime.utcnow() new_feed.has_feed_exception = False new_feed.active = True new_feed.save() merge_feeds(new_feed.pk, feed.pk) else: # Branch good feed logging.user( request, "~FRBranching feed by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) feed, _ = Feed.objects.get_or_create(feed_address=feed_address, feed_link=feed.feed_link) if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_address_locked = True feed.save() code = 1 feed = feed.update() feed = Feed.objects.get(pk=feed.pk) usersub = UserSubscription.objects.get(user=request.user, feed=original_feed) if usersub: usersub.switch_feed(feed, original_feed) usersub = UserSubscription.objects.get(user=request.user, feed=feed) usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, usersub.feed.pk) feeds = { original_feed.pk: usersub.canonical(full=True, classifiers=classifiers), } return { 'code': code, 'feeds': feeds, 'new_feed_id': usersub.feed.pk, }