예제 #1
0
    def handle(self, *args, **kwargs):
        self.stdout.write('Start...')

        path = kwargs.get('path')
        if not path:
            raise CommandError('Path is required')

        with open(path, 'r',
                  encoding=settings.MIGRATE_FILE_ENCODING) as csvfile:
            reader = csv.reader(csvfile)

            sections = []
            for row in reader:
                try:
                    data = perl_to_python_dict(row[8])
                except Exception as e:
                    self.stderr.write(e)
                    continue

                query_string = urlparse(data.get('rss')).query
                query_dict = parse_qs(query_string)
                channel = query_dict.get('user')
                if isinstance(channel, (list, tuple)):
                    channel = channel[0]

                sections.append(
                    Section(name=row[7],
                            slug=data.get('section_alias'),
                            is_video=True,
                            channel=channel,
                            is_active=bool(int(row[5])),
                            ext_id=row[0]))
        Section.objects.bulk_create(sections, batch_size=100)
        self.stdout.write('End...')
예제 #2
0
    def handle(self, *args, **kwargs):
        self.stdout.write('Start...')

        path = kwargs.get('path')
        if not path:
            raise CommandError('Path is required')

        partition_mapping = dict(Partition.objects.values_list('ext_id', 'id'))
        multimedia_mapping = dict(
            Multimedia.objects.values_list('ext_id', 'id'))

        self.stdout.write('Parse file...')
        csv.field_size_limit(500 * 1024 * 1024)
        with open(path, 'r',
                  encoding=settings.MIGRATE_FILE_ENCODING) as csvfile:
            reader = csv.reader(csvfile)

            resources = []
            for row in reader:

                try:
                    data = perl_to_python_dict(row[9])
                except Exception as e:
                    self.stderr.write(e)
                    continue

                section_ext_ids = set(perl_to_python_list(row[10])) - {124}
                if not section_ext_ids:
                    continue

                partition_ids = []
                for section_ext_id in section_ext_ids:
                    partition_id = partition_mapping.get(section_ext_id)
                    if partition_id:
                        partition_ids.append(partition_id)
                if not partition_ids:
                    continue

                multimedia_ext_ids = perl_to_python_list(row[11])
                multimedia_id = None
                if multimedia_ext_ids:
                    for multimedia_ext_id in multimedia_ext_ids:
                        multimedia_id = multimedia_mapping.get(
                            multimedia_ext_id)
                        if multimedia_id:
                            break

                resources.append(
                    Resource(partition_id=partition_ids[0],
                             name=row[7],
                             url=data['url'],
                             rating=int(row[8]),
                             multimedia_id=multimedia_id,
                             is_active=bool(int(row[5]))))

        if resources:
            self.stdout.write('Bulk create resources...')
            Resource.objects.bulk_create(resources, batch_size=BATCH_SIZE)

        self.stdout.write('End...')
예제 #3
0
    def handle(self, *args, **kwargs):
        self.stdout.write('Start...')

        path = kwargs.get('path')
        if not path:
            raise CommandError('Path is required')

        with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile:
            reader = csv.reader(csvfile)

            bloggers = []
            for row in reader:
                first_name, last_name = row[7].split()

                try:
                    data = perl_to_python_dict(row[8])
                except Exception as e:
                    self.stderr.write(e)
                    continue

                bloggers.append(
                    Blogger(
                        first_name=first_name, last_name=last_name, link=data.get('url'),
                        is_active=bool(int(row[5])), ext_id=row[0]
                    )
                )
        Blogger.objects.bulk_create(bloggers, batch_size=100)
        self.stdout.write('End...')
예제 #4
0
    def handle(self, *args, **kwargs):
        self.stdout.write('Start...')

        path = kwargs.get('path')
        if not path:
            raise CommandError('Path is required')

        multimedia_mapping = dict(
            Multimedia.objects.values_list('ext_id', 'id'))

        self.stdout.write('Parse file...')
        csv.field_size_limit(500 * 1024 * 1024)
        with open(path, 'r',
                  encoding=settings.MIGRATE_FILE_ENCODING) as csvfile:
            reader = csv.reader(csvfile)

            authors = []
            for row in reader:
                if row[1] != 'Forum::Author':
                    continue

                try:
                    last_name, first_name = row[7].split()
                except ValueError:
                    self.stderr.write('{} not author name'.format(row[7]))
                    continue

                try:
                    data = perl_to_python_dict(row[8])
                except Exception as e:
                    self.stderr.write(e)
                    continue

                multimedia_ext_ids = perl_to_python_list(row[10])
                multimedia_id = None
                if multimedia_ext_ids:
                    for multimedia_ext_id in multimedia_ext_ids:
                        multimedia_id = multimedia_mapping.get(
                            multimedia_ext_id)
                        if multimedia_id:
                            break

                authors.append(
                    Author(first_name=first_name,
                           last_name=last_name,
                           description=data.get('about'),
                           is_active=bool(int(row[5])),
                           multimedia_id=multimedia_id,
                           ext_id=row[0]))
        Author.objects.bulk_create(authors, batch_size=BATCH_SIZE)
        self.stdout.write('End...')
예제 #5
0
    def handle(self, *args, **kwargs):
        self.stdout.write('Start...')

        path = kwargs.get('path')
        if not path:
            raise CommandError('Path is required')

        self.stdout.write('Parse file...')
        csv.field_size_limit(500 * 1024 * 1024)
        with open(path, 'r',
                  encoding=settings.MIGRATE_FILE_ENCODING) as csvfile:
            reader = csv.reader(csvfile)

            multimedia = []
            j = 0
            for row in reader:

                try:
                    data = perl_to_python_dict(row[6])
                except Exception as e:
                    self.stderr.write(e)
                    continue

                try:
                    suffix = data['info']['suffix']
                    if suffix in ('jpg', 'gif', 'jpeg', 'png', 'bmp'):
                        multimedia.append(
                            Multimedia(image_url=data['info']['path'] + '.' +
                                       suffix,
                                       ext_id=int(row[0])))
                except KeyError:
                    continue

                if len(multimedia) == BATCH_SIZE:
                    self.stdout.write(
                        'Bulk create multimedia (iter {})...'.format(j))
                    j += 1
                    Multimedia.objects.bulk_create(multimedia)
                    multimedia = []

        if multimedia:
            self.stdout.write('Bulk create parents comments (iter end)...')
            Multimedia.objects.bulk_create(multimedia)

        self.stdout.write('End...')
예제 #6
0
    def handle(self, *args, **kwargs):
        self.stdout.write('Start...')
        current_timezone = timezone.get_current_timezone()

        article_ct = ContentType.objects.get_for_model(Article)

        path = kwargs.get('path')
        if not path:
            raise CommandError('Path is required')

        authors_mapping = dict(Author.objects.values_list('ext_id', 'id'))
        sections_mapping = dict(Section.objects.values_list('ext_id', 'id'))
        tags_mapping = dict(Tag.objects.values_list('ext_id', 'id'))
        multimedia_mapping = dict(Multimedia.objects.values_list('ext_id', 'id'))

        video_sections = list(Section.objects.filter(is_video=True).values_list('id', flat=True))

        self.stdout.write('Parse file...')
        csv.field_size_limit(500 * 1024 * 1024)
        with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile:
            reader = csv.reader(csvfile)

            notices = []

            articles = []
            article_authors_relation = {}
            article_tags_relation = defaultdict(list)
            article_votes_relation = {}
            j = 0
            for row in reader:

                multimedia_ext_ids = perl_to_python_list(row[17])
                multimedia_id = None
                if multimedia_ext_ids:
                    for multimedia_ext_id in multimedia_ext_ids:
                        multimedia_id = multimedia_mapping.get(multimedia_ext_id)
                        if multimedia_id:
                            break

                try:
                    data = perl_to_python_dict(row[12])
                except Exception as e:
                    self.stderr.write(e)
                    continue

                # flags
                is_news = False
                is_day_material = False
                is_main_news = False
                is_ticker = False

                section_ext_ids = sorted(perl_to_python_list(row[13]))

                if 127 in section_ext_ids:
                    if row[7]:
                        notices.append(
                            Notice(
                                content=row[7],
                                status=Notice.STATUS.approved if int(row[5]) else Notice.STATUS.rejected
                            )
                        )
                    continue

                is_video = bool(row[15])

                section_id = None
                for ext_id in section_ext_ids:
                    if ext_id in sections_mapping:
                        section_id = sections_mapping[ext_id]
                        if section_id in video_sections and not is_video:
                            section_id = None
                            continue
                        break

                if 108 in section_ext_ids:
                    is_news = True
                if (113 in section_ext_ids) or (8221431 in section_ext_ids):
                    is_day_material = True
                if (114 in section_ext_ids) or (8221430 in section_ext_ids):
                    is_main_news = True
                if 123 in section_ext_ids:
                    is_ticker = True

                article_ext_id = int(row[0])

                # store authors for later use
                raw_author = row[8]
                if raw_author:
                    try:
                        ext_id = int(raw_author)
                    except (ValueError, TypeError):
                        pass
                    else:
                        if ext_id in authors_mapping:
                            article_authors_relation[article_ext_id] = authors_mapping[ext_id]

                # store tags for later use
                for ext_id in perl_to_python_list(row[18]):
                    if ext_id in tags_mapping:
                        article_tags_relation[article_ext_id].append(tags_mapping[ext_id])

                # store votes for later use
                rating = 0
                vote_count = 0
                try:
                    vote_count = int(row[10])
                    vote_sum = int(row[11])
                except (ValueError, TypeError):
                    pass
                else:
                    article_votes_relation[article_ext_id] = vote_values(vote_count, vote_sum)
                    if vote_count:
                        rating = float(vote_sum) / vote_count

                discussion_status = Article.DISCUSSION_STATUS.close
                can_comment = data.get('can_comment')
                if can_comment and can_comment == '1':
                    discussion_status = Article.DISCUSSION_STATUS.open

                articles.append(
                    Article(
                        title=row[7],
                        description=data.get('note') or '',
                        content=data.get('body') or '',
                        section_id=section_id,
                        author_names=data.get('author') if (data.get('author')
                                                            and len(data.get('author')) < 511) else '',
                        publish_date=timezone.make_aware(parse_datetime(row[4]), current_timezone, is_dst=True),
                        is_active=bool(int(row[5])),
                        source=data.get('source') or '',
                        source_link=data.get('sourcelink') if (data.get('sourcelink')
                                                               and len(data.get('sourcelink')) < 200) else '',
                        discussion_status=discussion_status,
                        status=Article.STATUS.approved,
                        video=row[15] if (row[15] and len(row[15]) < 200) else '',
                        is_news=is_news,
                        is_ticker=is_ticker,
                        is_main_news=is_main_news,
                        is_day_material=is_day_material,
                        rating=rating,
                        vote_count=vote_count,
                        multimedia_id=multimedia_id,
                        thread_id=row[16] or 0,
                        ext_id=article_ext_id
                    )
                )
                if len(articles) == BATCH_SIZE:
                    self.stdout.write('Bulk create articles (iter {})...'.format(j))
                    j += 1
                    Article.objects.bulk_create(articles)
                    articles = []

        if articles:
            self.stdout.write('Bulk create articles (iter end)...')
            Article.objects.bulk_create(articles)

        self.stdout.write('Bulk create notices...')
        Notice.objects.bulk_create(notices, batch_size=BATCH_SIZE)

        self.stdout.write('- create articles_mapping...')
        articles_mapping = dict(Article.objects.values_list('ext_id', 'id'))

        # attach authors ------------------------------------------------------
        self.stdout.write('Start build relations atricles <-> authors...')
        article_authors = []
        j = 0
        for article_ext_id, author_id in article_authors_relation.items():
            if article_ext_id in articles_mapping:
                article_id = articles_mapping[article_ext_id]
                article_authors.append(
                    Article.authors.through(article_id=article_id, author_id=author_id)
                )
                if len(article_authors) == BATCH_SIZE:
                    self.stdout.write('Bulk create relations atricles <-> authors (iter {})...'.format(j))
                    j += 1
                    Article.authors.through.objects.bulk_create(article_authors)
                    article_authors = []
        if article_authors:
            self.stdout.write('Bulk create relations atricles <-> authors (iter end)...')
            Article.authors.through.objects.bulk_create(article_authors)

        # attach tags ---------------------------------------------------------
        self.stdout.write('Start build relations atricles <-> tags...')
        tagged_items = []
        j = 0
        for article_ext_id, tag_ids in article_tags_relation.items():
            if article_ext_id in articles_mapping:
                article_id = articles_mapping[article_ext_id]

                if len(tagged_items) + len(tag_ids) > BATCH_SIZE:
                    for tag_id in tag_ids:
                        tagged_items.append(
                            TaggedItem(object_id=article_id, content_type_id=article_ct.id, tag_id=tag_id)
                        )
                        if len(tagged_items) == BATCH_SIZE:
                            self.stdout.write('Bulk create relations atricles <-> tags (iter {})...'.format(j))
                            j += 1
                            TaggedItem.objects.bulk_create(tagged_items)
                            tagged_items = []
                else:
                    tagged_items.extend(
                        [TaggedItem(object_id=article_id, content_type_id=article_ct.id, tag_id=tag_id)
                         for tag_id in tag_ids]
                    )
                    if len(tagged_items) == BATCH_SIZE:
                        self.stdout.write('Bulk create relations atricles <-> tags (iter {})...'.format(j))
                        j += 1
                        TaggedItem.objects.bulk_create(tagged_items)
                        tagged_items = []
        if tagged_items:
            self.stdout.write('Bulk create relations atricles <-> tags (iter end)...')
            TaggedItem.objects.bulk_create(tagged_items)

        # attach votes --------------------------------------------------------
        self.stdout.write('Start build relations atricles <-> votes...')
        votes = []
        j = 0
        for article_ext_id, vote_data in article_votes_relation.items():
            if article_ext_id in articles_mapping:
                article_id = articles_mapping[article_ext_id]
                for count, score in vote_data:

                    if len(votes) + count > BATCH_SIZE:
                        for i in range(count):
                            votes.append(
                                Vote(object_id=article_id, content_type_id=article_ct.id, score=score)
                            )
                            if len(votes) == BATCH_SIZE:
                                self.stdout.write('Bulk create relations atricles <-> votes (iter {})...'.format(j))
                                j += 1
                                Vote.objects.bulk_create(votes)
                                votes = []
                    else:
                        votes.extend(
                            [Vote(object_id=article_id, content_type_id=article_ct.id, score=score)
                             for i in range(count)]
                        )
                        if len(votes) == BATCH_SIZE:
                            self.stdout.write('Bulk create relations atricles <-> votes (iter {})...'.format(j))
                            j += 1
                            Vote.objects.bulk_create(votes)
                            votes = []
        if votes:
            self.stdout.write('Bulk create relations atricles <-> votes (iter end)...')
            Vote.objects.bulk_create(votes)

        self.stdout.write('End...')
예제 #7
0
    def handle(self, *args, **kwargs):
        self.stdout.write('Start...')

        choice_ct = ContentType.objects.get_for_model(Choice)

        path = kwargs.get('path')
        if not path:
            raise CommandError('Path is required')

        self.stdout.write('Parse file...')
        csv.field_size_limit(500 * 1024 * 1024)
        with open(path, 'r',
                  encoding=settings.MIGRATE_FILE_ENCODING) as csvfile:
            reader = csv.reader(csvfile)

            poll_sum_votes = {}
            votes = []

            for row in reader:

                try:
                    data = perl_to_python_dict(row[9])
                except Exception as e:
                    self.stderr.write(e)
                    continue

                try:
                    results = perl_to_python_dict(data['results'],
                                                  second=True)['1']
                except Exception as e:
                    self.stderr.write(e)
                    continue

                answers = data['votes']
                try:
                    answers = answers[answers.find("{") + 1:answers.rfind("}")]
                    answers = answers[answers.find("[") +
                                      2:answers.rfind("]") - 1]
                    answers = answers.split('","')
                except Exception as e:
                    self.stderr.write(e)
                    continue

                poll = Poll.objects.create(question=row[7],
                                           is_active=bool(int(row[5])))
                poll_sum_votes[poll.id] = 0

                for i, answer in enumerate(answers, start=1):
                    vote_count = results[str(i)]
                    poll_sum_votes[poll.id] += vote_count

                    choice = Choice.objects.create(poll=poll,
                                                   answer=answer,
                                                   vote_count=vote_count)
                    votes.extend([
                        Vote(object_id=choice.id,
                             content_type_id=choice_ct.id,
                             score=1) for i in range(vote_count)
                    ])

            if votes:
                self.stdout.write('Bulk create choices votes...')
                Vote.objects.bulk_create(votes, batch_size=BATCH_SIZE)

            polls = Poll.objects.all()
            for poll in polls:
                poll.sum_votes = poll_sum_votes[poll.id]
            bulk_update(polls, update_fields=['sum_votes'])

        self.stdout.write('End...')