def handle(self, *args, **options):
        """
        Gets the data for active feeds by url, saves the feed entries to the database
        """
        # Delete all previous data first
        WordType.delete_all()
        Word.delete_all()
        Entry.delete_all()

        self.stdout.write('Counting words...')
        entry_data, feed_data, word_data, entry_url_lookup = Word.count_words()
        self.stdout.write('Counting complete.')

        self.stdout.write('Saving data...')

        # Save words
        words = [Word.create(word, number) for word, number in word_data.items()]
        Word.objects.bulk_create(words)

        # Save entries
        Entry.objects.bulk_create(entry_url_lookup.values())

        # Lookups used to create WordType objects
        words_lookup = dict(Word.objects.all().values_list('word', 'id'))
        feeds_lookup = {item.id: item for item in Feed.objects.active_feeds()}
        entries_lookup = {item.url: item for item in Entry.objects.all()}

        # Save WordType objects
        feed_word_types = Word.create_word_types(feed_data, feeds_lookup, words_lookup)
        entry_word_types = Word.create_word_types(entry_data, entries_lookup, words_lookup)
        WordType.objects.bulk_create(feed_word_types + entry_word_types)

        self.stdout.write('Complete!')
Exemple #2
0
def __add_entries(entries, feed):
    """
    Private add entries func.

    Adds entries to a feed without repeating them.
    Don't downloads entry, if there is entry with such title
        from another feed. Instead it uses that entry.
    """

    for entry in entries:
        try:
            # If there is entry with such title in this feed
            Entry.objects.get(title=entry.title, feed=feed)
            continue
        except Entry.DoesNotExist:
            pass

        # Try to find another entries with such title
        e = Entry.objects.filter(title=entry.title)
        # If found
        if len(e) != 0:
            e = e[0]
            # Copy all containing
            entry_obj = Entry(title=e.title,
                description=e.description,
                entry=e.entry, feed=feed)
            entry_obj.save()
        # Or create new Entry from scratch
        else:
            entry_name = entry.title + '.html'
            # If bad link or entry name
            try:
                urlretrieve(entry.link, entry_name)

                entry_file = open(entry_name)
                entry_file = File(entry_file)

                entry_obj = Entry(title=entry.title,
                    description=entry.description,
                    entry=entry_file, feed=feed)
                entry_obj.save()

                os.remove(entry_name)
            except:
            # Go to next entry
                continue