Exemple #1
0
    def update_top_stories(self):
        try:
            popular_stories = self.client.get_top_stories()
            today = timezone.now()

            for story_data in popular_stories:
                story, created = Story.objects.get_or_create(
                        service=self.service,
                        code=story_data['id'],
                        date=timezone.datetime(today.year, today.month, today.day, tzinfo=timezone.get_current_timezone())
                    )

                score = story_data['score']
                has_changes = (score > story.score)
                if not story.status == Story.NEW and has_changes:
                    update = StoryUpdate(story=story)
                    update.score_changes = score - story.score
                    update.save()

                story.title = story_data['title']
                story.url = story_data['url']
                story.score = score
                story.status = Story.OK
                story.save()

        except Exception, e:
            logging.error(e)
Exemple #2
0
    def save_story(self, story_data, score, weight):
        story_id = story_data.get('id', story_data.get('asset_id', None))
        if not story_id:
            return

        today = timezone.now()
        story, created = Story.objects.get_or_create(
                service=self.service,
                code=story_id,
                date=timezone.datetime(today.year, today.month, today.day, tzinfo=timezone.get_current_timezone())
            )

        story.title = story_data['title']
        story.url = story_data['url']

        minutes_since_last_update = 0

        if story.updates.exists():
            last_update = story.updates.order_by('-updated_at').first()
            delta = timezone.now() - last_update.updated_at
            minutes_since_last_update = delta.total_seconds() / 60

        if created or minutes_since_last_update >= 30:
            score_run = score * weight
            story.score += score_run

            update = StoryUpdate(story=story)
            update.score_changes = score_run
            update.save()

        story.status = Story.OK
        story.save()
Exemple #3
0
    def update_top_stories(self):
        try:
            repos = self.client.get_today_trending_repositories()
            today = timezone.now()
            for data in repos:
                story, created = Story.objects.get_or_create(
                    service=self.service,
                    code=data.get('name'),
                    date=timezone.datetime(
                        today.year,
                        today.month,
                        today.day,
                        tzinfo=timezone.get_current_timezone()))
                if created:
                    story.build_url()

                stars = data.get('stars', 0)
                '''
                    Because of the nature of the github trending repositories
                    we are only interested on changes where the stars have increased
                    this way the crawler is gonna campure the highest starts one repository
                    got in a single day
                '''
                has_changes = (stars > story.score)

                if story.status == Story.NEW:
                    story.score = stars
                elif has_changes:
                    update = StoryUpdate(story=story)
                    update.score_changes = stars - story.score
                    update.save()
                    story.score = stars

                story.title = data.get('name')[1:]

                description = data.get('description', '')
                language = data.get('language', '')

                if language and description:
                    description = u'{0} • {1}'.format(language, description)
                elif language:
                    description = language

                story.description = description

                story.status = Story.OK
                story.save()

        except Exception, e:
            logging.error(e)
Exemple #4
0
    def update_story(self, code):
        try:
            story_data = self.client.get_story(code)
            if story_data and story_data['type'] == 'story':
                story, created = Story.objects.get_or_create(service=self.service, code=code)

                if story_data.get('deleted', False):
                    story.delete()
                    return

                if story.status == Story.NEW:
                    story.date = timezone.datetime.fromtimestamp(story_data.get('time'), timezone.get_current_timezone())
                    story.url = u'{0}{1}'.format(story.service.story_url, story.code)
                
                score = story_data.get('score', 0)
                comments = story_data.get('descendants', 0)
                has_changes = (score != story.score or comments != story.comments)

                if not story.status == Story.NEW and has_changes:
                    update = StoryUpdate(story=story)
                    update.comments_changes = comments - story.comments
                    update.score_changes = score - story.score
                    update.save()

                story.comments = comments
                story.score = score
                story.title = story_data.get('title', '')

                url = story_data.get('url', '')
                if url:
                    story.content_type = Story.URL
                    story.content = url

                text = story_data.get('text', '')
                if text:
                    story.content_type = Story.TEXT
                    story.content = text

                story.status = Story.OK
                story.save()
        except Exception, e:
            logging.error(u'Exception in code {0} HackerNewsCrawler.update_story'.format(code))
            logging.error(e)
Exemple #5
0
    def update_top_stories(self):
        try:
            repos = self.client.get_today_trending_repositories()
            today = timezone.now()
            for data in repos:
                story, created = Story.objects.get_or_create(service=self.service, code=data.get('name'), date=timezone.datetime(today.year, today.month, today.day, tzinfo=timezone.get_current_timezone()))
                if created:
                    story.build_url()

                stars = data.get('stars', 0)
                '''
                    Because of the nature of the github trending repositories
                    we are only interested on changes where the stars have increased
                    this way the crawler is gonna campure the highest starts one repository
                    got in a single day
                '''
                has_changes = (stars > story.score)

                if story.status == Story.NEW:
                    story.score = stars
                elif has_changes:
                    update = StoryUpdate(story=story)
                    update.score_changes = stars - story.score
                    update.save()
                    story.score = stars                   

                story.title = data.get('name')[1:]

                description = data.get('description', '')
                language = data.get('language', '')

                if language and description:
                    description = u'{0} • {1}'.format(language, description)
                elif language:
                    description = language

                story.description = description

                story.status = Story.OK
                story.save()

        except Exception, e:
            logging.error(e)
Exemple #6
0
    def update_top_stories(self):
        try:
            stories = self.client.get_front_page_stories()
            for data in stories:
                story_data = data['data']
                story, created = Story.objects.get_or_create(service=self.service, code=story_data.get('permalink'))
                if created:
                    story.date = timezone.datetime.fromtimestamp(story_data.get('created_utc'), timezone.get_current_timezone())
                    story.build_url()

                score = story_data.get('score', 0)
                comments = story_data.get('num_comments', 0)
                has_changes = (score != story.score or comments != story.comments)

                if not story.status == Story.NEW and has_changes:
                    update = StoryUpdate(story=story)
                    update.comments_changes = comments - story.comments
                    update.score_changes = score - story.score
                    update.save()

                story.comments = comments
                story.score = score
                story.title = story_data.get('title', '')
                story.nsfw = story_data.get('over_18', False)

                story.status = Story.OK
                story.save()
        except Exception, e:
            logging.error(e)
Exemple #7
0
    def update_top_stories(self):
        try:
            posts = self.client.get_top_stories()
            today = timezone.now()
            for post_data in posts:
                story, created = Story.objects.get_or_create(service=self.service, code=post_data['id'], date=timezone.datetime(today.year, today.month, today.day, tzinfo=timezone.get_current_timezone()))

                if created:
                    story.url = u'{0}/@{1}/{2}'.format(self.service.story_url, post_data['creator']['username'], post_data['id'])
                    story.start_score = int(post_data['virtuals']['recommends'])
                    story.start_comments = int(post_data['virtuals']['responsesCreatedCount'])
                
                story.title = post_data['title']

                recommends = int(post_data['virtuals']['recommends']) - story.start_score
                comments = int(post_data['virtuals']['responsesCreatedCount']) - story.start_comments
                has_changes = (recommends != story.score or comments != story.comments)

                if not story.status == Story.NEW and has_changes:
                    update = StoryUpdate(story=story)
                    update.comments_changes = comments - story.comments
                    update.score_changes = recommends - story.score
                    update.save()

                story.score = recommends
                story.comments = comments

                story.status = Story.OK
                story.save()

        except Exception, e:
            logging.error(e)
Exemple #8
0
    def update_story(self, code):
        try:
            story_data = self.client.get_story(code)
            if story_data and story_data['type'] == 'story':
                story, created = Story.objects.get_or_create(
                    service=self.service, code=code)

                if story_data.get('deleted', False):
                    story.delete()
                    return

                if story.status == Story.NEW:
                    story.date = timezone.datetime.fromtimestamp(
                        story_data.get('time'),
                        timezone.get_current_timezone())
                    story.url = u'{0}{1}'.format(story.service.story_url,
                                                 story.code)

                score = story_data.get('score', 0)
                comments = story_data.get('descendants', 0)
                has_changes = (score != story.score
                               or comments != story.comments)

                if not story.status == Story.NEW and has_changes:
                    update = StoryUpdate(story=story)
                    update.comments_changes = comments - story.comments
                    update.score_changes = score - story.score
                    update.save()

                story.comments = comments
                story.score = score
                story.title = story_data.get('title', '')

                url = story_data.get('url', '')
                if url:
                    story.content_type = Story.URL
                    story.content = url

                text = story_data.get('text', '')
                if text:
                    story.content_type = Story.TEXT
                    story.content = text

                story.status = Story.OK
                story.save()
        except Exception, e:
            logging.error(
                u'Exception in code {0} HackerNewsCrawler.update_story'.format(
                    code))
            logging.error(e)