def handle(self, *args, **kwargs): self.stdout.write('Start...') path = kwargs.get('path') if not path: raise CommandError('Path is required') with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile: reader = csv.reader(csvfile) sections = [] for row in reader: try: data = perl_to_python_dict(row[8]) except Exception as e: self.stderr.write(e) continue query_string = urlparse(data.get('rss')).query query_dict = parse_qs(query_string) channel = query_dict.get('user') if isinstance(channel, (list, tuple)): channel = channel[0] sections.append( Section(name=row[7], slug=data.get('section_alias'), is_video=True, channel=channel, is_active=bool(int(row[5])), ext_id=row[0])) Section.objects.bulk_create(sections, batch_size=100) self.stdout.write('End...')
def handle(self, *args, **kwargs): self.stdout.write('Start...') path = kwargs.get('path') if not path: raise CommandError('Path is required') partition_mapping = dict(Partition.objects.values_list('ext_id', 'id')) multimedia_mapping = dict( Multimedia.objects.values_list('ext_id', 'id')) self.stdout.write('Parse file...') csv.field_size_limit(500 * 1024 * 1024) with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile: reader = csv.reader(csvfile) resources = [] for row in reader: try: data = perl_to_python_dict(row[9]) except Exception as e: self.stderr.write(e) continue section_ext_ids = set(perl_to_python_list(row[10])) - {124} if not section_ext_ids: continue partition_ids = [] for section_ext_id in section_ext_ids: partition_id = partition_mapping.get(section_ext_id) if partition_id: partition_ids.append(partition_id) if not partition_ids: continue multimedia_ext_ids = perl_to_python_list(row[11]) multimedia_id = None if multimedia_ext_ids: for multimedia_ext_id in multimedia_ext_ids: multimedia_id = multimedia_mapping.get( multimedia_ext_id) if multimedia_id: break resources.append( Resource(partition_id=partition_ids[0], name=row[7], url=data['url'], rating=int(row[8]), multimedia_id=multimedia_id, is_active=bool(int(row[5])))) if resources: self.stdout.write('Bulk create resources...') Resource.objects.bulk_create(resources, batch_size=BATCH_SIZE) self.stdout.write('End...')
def handle(self, *args, **kwargs): self.stdout.write('Start...') path = kwargs.get('path') if not path: raise CommandError('Path is required') with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile: reader = csv.reader(csvfile) bloggers = [] for row in reader: first_name, last_name = row[7].split() try: data = perl_to_python_dict(row[8]) except Exception as e: self.stderr.write(e) continue bloggers.append( Blogger( first_name=first_name, last_name=last_name, link=data.get('url'), is_active=bool(int(row[5])), ext_id=row[0] ) ) Blogger.objects.bulk_create(bloggers, batch_size=100) self.stdout.write('End...')
def handle(self, *args, **kwargs): self.stdout.write('Start...') path = kwargs.get('path') if not path: raise CommandError('Path is required') multimedia_mapping = dict( Multimedia.objects.values_list('ext_id', 'id')) self.stdout.write('Parse file...') csv.field_size_limit(500 * 1024 * 1024) with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile: reader = csv.reader(csvfile) authors = [] for row in reader: if row[1] != 'Forum::Author': continue try: last_name, first_name = row[7].split() except ValueError: self.stderr.write('{} not author name'.format(row[7])) continue try: data = perl_to_python_dict(row[8]) except Exception as e: self.stderr.write(e) continue multimedia_ext_ids = perl_to_python_list(row[10]) multimedia_id = None if multimedia_ext_ids: for multimedia_ext_id in multimedia_ext_ids: multimedia_id = multimedia_mapping.get( multimedia_ext_id) if multimedia_id: break authors.append( Author(first_name=first_name, last_name=last_name, description=data.get('about'), is_active=bool(int(row[5])), multimedia_id=multimedia_id, ext_id=row[0])) Author.objects.bulk_create(authors, batch_size=BATCH_SIZE) self.stdout.write('End...')
def handle(self, *args, **kwargs): self.stdout.write('Start...') path = kwargs.get('path') if not path: raise CommandError('Path is required') self.stdout.write('Parse file...') csv.field_size_limit(500 * 1024 * 1024) with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile: reader = csv.reader(csvfile) multimedia = [] j = 0 for row in reader: try: data = perl_to_python_dict(row[6]) except Exception as e: self.stderr.write(e) continue try: suffix = data['info']['suffix'] if suffix in ('jpg', 'gif', 'jpeg', 'png', 'bmp'): multimedia.append( Multimedia(image_url=data['info']['path'] + '.' + suffix, ext_id=int(row[0]))) except KeyError: continue if len(multimedia) == BATCH_SIZE: self.stdout.write( 'Bulk create multimedia (iter {})...'.format(j)) j += 1 Multimedia.objects.bulk_create(multimedia) multimedia = [] if multimedia: self.stdout.write('Bulk create parents comments (iter end)...') Multimedia.objects.bulk_create(multimedia) self.stdout.write('End...')
def handle(self, *args, **kwargs): self.stdout.write('Start...') current_timezone = timezone.get_current_timezone() article_ct = ContentType.objects.get_for_model(Article) path = kwargs.get('path') if not path: raise CommandError('Path is required') authors_mapping = dict(Author.objects.values_list('ext_id', 'id')) sections_mapping = dict(Section.objects.values_list('ext_id', 'id')) tags_mapping = dict(Tag.objects.values_list('ext_id', 'id')) multimedia_mapping = dict(Multimedia.objects.values_list('ext_id', 'id')) video_sections = list(Section.objects.filter(is_video=True).values_list('id', flat=True)) self.stdout.write('Parse file...') csv.field_size_limit(500 * 1024 * 1024) with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile: reader = csv.reader(csvfile) notices = [] articles = [] article_authors_relation = {} article_tags_relation = defaultdict(list) article_votes_relation = {} j = 0 for row in reader: multimedia_ext_ids = perl_to_python_list(row[17]) multimedia_id = None if multimedia_ext_ids: for multimedia_ext_id in multimedia_ext_ids: multimedia_id = multimedia_mapping.get(multimedia_ext_id) if multimedia_id: break try: data = perl_to_python_dict(row[12]) except Exception as e: self.stderr.write(e) continue # flags is_news = False is_day_material = False is_main_news = False is_ticker = False section_ext_ids = sorted(perl_to_python_list(row[13])) if 127 in section_ext_ids: if row[7]: notices.append( Notice( content=row[7], status=Notice.STATUS.approved if int(row[5]) else Notice.STATUS.rejected ) ) continue is_video = bool(row[15]) section_id = None for ext_id in section_ext_ids: if ext_id in sections_mapping: section_id = sections_mapping[ext_id] if section_id in video_sections and not is_video: section_id = None continue break if 108 in section_ext_ids: is_news = True if (113 in section_ext_ids) or (8221431 in section_ext_ids): is_day_material = True if (114 in section_ext_ids) or (8221430 in section_ext_ids): is_main_news = True if 123 in section_ext_ids: is_ticker = True article_ext_id = int(row[0]) # store authors for later use raw_author = row[8] if raw_author: try: ext_id = int(raw_author) except (ValueError, TypeError): pass else: if ext_id in authors_mapping: article_authors_relation[article_ext_id] = authors_mapping[ext_id] # store tags for later use for ext_id in perl_to_python_list(row[18]): if ext_id in tags_mapping: article_tags_relation[article_ext_id].append(tags_mapping[ext_id]) # store votes for later use rating = 0 vote_count = 0 try: vote_count = int(row[10]) vote_sum = int(row[11]) except (ValueError, TypeError): pass else: article_votes_relation[article_ext_id] = vote_values(vote_count, vote_sum) if vote_count: rating = float(vote_sum) / vote_count discussion_status = Article.DISCUSSION_STATUS.close can_comment = data.get('can_comment') if can_comment and can_comment == '1': discussion_status = Article.DISCUSSION_STATUS.open articles.append( Article( title=row[7], description=data.get('note') or '', content=data.get('body') or '', section_id=section_id, author_names=data.get('author') if (data.get('author') and len(data.get('author')) < 511) else '', publish_date=timezone.make_aware(parse_datetime(row[4]), current_timezone, is_dst=True), is_active=bool(int(row[5])), source=data.get('source') or '', source_link=data.get('sourcelink') if (data.get('sourcelink') and len(data.get('sourcelink')) < 200) else '', discussion_status=discussion_status, status=Article.STATUS.approved, video=row[15] if (row[15] and len(row[15]) < 200) else '', is_news=is_news, is_ticker=is_ticker, is_main_news=is_main_news, is_day_material=is_day_material, rating=rating, vote_count=vote_count, multimedia_id=multimedia_id, thread_id=row[16] or 0, ext_id=article_ext_id ) ) if len(articles) == BATCH_SIZE: self.stdout.write('Bulk create articles (iter {})...'.format(j)) j += 1 Article.objects.bulk_create(articles) articles = [] if articles: self.stdout.write('Bulk create articles (iter end)...') Article.objects.bulk_create(articles) self.stdout.write('Bulk create notices...') Notice.objects.bulk_create(notices, batch_size=BATCH_SIZE) self.stdout.write('- create articles_mapping...') articles_mapping = dict(Article.objects.values_list('ext_id', 'id')) # attach authors ------------------------------------------------------ self.stdout.write('Start build relations atricles <-> authors...') article_authors = [] j = 0 for article_ext_id, author_id in article_authors_relation.items(): if article_ext_id in articles_mapping: article_id = articles_mapping[article_ext_id] article_authors.append( Article.authors.through(article_id=article_id, author_id=author_id) ) if len(article_authors) == BATCH_SIZE: self.stdout.write('Bulk create relations atricles <-> authors (iter {})...'.format(j)) j += 1 Article.authors.through.objects.bulk_create(article_authors) article_authors = [] if article_authors: self.stdout.write('Bulk create relations atricles <-> authors (iter end)...') Article.authors.through.objects.bulk_create(article_authors) # attach tags --------------------------------------------------------- self.stdout.write('Start build relations atricles <-> tags...') tagged_items = [] j = 0 for article_ext_id, tag_ids in article_tags_relation.items(): if article_ext_id in articles_mapping: article_id = articles_mapping[article_ext_id] if len(tagged_items) + len(tag_ids) > BATCH_SIZE: for tag_id in tag_ids: tagged_items.append( TaggedItem(object_id=article_id, content_type_id=article_ct.id, tag_id=tag_id) ) if len(tagged_items) == BATCH_SIZE: self.stdout.write('Bulk create relations atricles <-> tags (iter {})...'.format(j)) j += 1 TaggedItem.objects.bulk_create(tagged_items) tagged_items = [] else: tagged_items.extend( [TaggedItem(object_id=article_id, content_type_id=article_ct.id, tag_id=tag_id) for tag_id in tag_ids] ) if len(tagged_items) == BATCH_SIZE: self.stdout.write('Bulk create relations atricles <-> tags (iter {})...'.format(j)) j += 1 TaggedItem.objects.bulk_create(tagged_items) tagged_items = [] if tagged_items: self.stdout.write('Bulk create relations atricles <-> tags (iter end)...') TaggedItem.objects.bulk_create(tagged_items) # attach votes -------------------------------------------------------- self.stdout.write('Start build relations atricles <-> votes...') votes = [] j = 0 for article_ext_id, vote_data in article_votes_relation.items(): if article_ext_id in articles_mapping: article_id = articles_mapping[article_ext_id] for count, score in vote_data: if len(votes) + count > BATCH_SIZE: for i in range(count): votes.append( Vote(object_id=article_id, content_type_id=article_ct.id, score=score) ) if len(votes) == BATCH_SIZE: self.stdout.write('Bulk create relations atricles <-> votes (iter {})...'.format(j)) j += 1 Vote.objects.bulk_create(votes) votes = [] else: votes.extend( [Vote(object_id=article_id, content_type_id=article_ct.id, score=score) for i in range(count)] ) if len(votes) == BATCH_SIZE: self.stdout.write('Bulk create relations atricles <-> votes (iter {})...'.format(j)) j += 1 Vote.objects.bulk_create(votes) votes = [] if votes: self.stdout.write('Bulk create relations atricles <-> votes (iter end)...') Vote.objects.bulk_create(votes) self.stdout.write('End...')
def handle(self, *args, **kwargs): self.stdout.write('Start...') choice_ct = ContentType.objects.get_for_model(Choice) path = kwargs.get('path') if not path: raise CommandError('Path is required') self.stdout.write('Parse file...') csv.field_size_limit(500 * 1024 * 1024) with open(path, 'r', encoding=settings.MIGRATE_FILE_ENCODING) as csvfile: reader = csv.reader(csvfile) poll_sum_votes = {} votes = [] for row in reader: try: data = perl_to_python_dict(row[9]) except Exception as e: self.stderr.write(e) continue try: results = perl_to_python_dict(data['results'], second=True)['1'] except Exception as e: self.stderr.write(e) continue answers = data['votes'] try: answers = answers[answers.find("{") + 1:answers.rfind("}")] answers = answers[answers.find("[") + 2:answers.rfind("]") - 1] answers = answers.split('","') except Exception as e: self.stderr.write(e) continue poll = Poll.objects.create(question=row[7], is_active=bool(int(row[5]))) poll_sum_votes[poll.id] = 0 for i, answer in enumerate(answers, start=1): vote_count = results[str(i)] poll_sum_votes[poll.id] += vote_count choice = Choice.objects.create(poll=poll, answer=answer, vote_count=vote_count) votes.extend([ Vote(object_id=choice.id, content_type_id=choice_ct.id, score=1) for i in range(vote_count) ]) if votes: self.stdout.write('Bulk create choices votes...') Vote.objects.bulk_create(votes, batch_size=BATCH_SIZE) polls = Poll.objects.all() for poll in polls: poll.sum_votes = poll_sum_votes[poll.id] bulk_update(polls, update_fields=['sum_votes']) self.stdout.write('End...')