def create(self, submission_id=None, seed_params={ 'seed_id': None, 'subreddit': None, 'before': None, 'after': None }, domain=None, max_attempts=3, max_interactions=None): import time t = time.process_time() if self.__seed or seed_params.get('seed_id'): self.__seed = self.__seed or Seed.objects.get( id=seed_params.get('seed_id')) self.__redditors = self.__seed.redditors self.__r_idx = self.__seed.r_idx self.__comments = self.__seed.comments self.__submissions = self.__seed.submissions self.__subreddit = set(self.__seed.subreddits) domain = self.__seed.domain else: submission = None if submission_id: try: submission = Submission.objects.get(id=submission_id) except ObjectDoesNotExist: submission = self.__search_engine.retrive_submission_by_id( submission_id) if not submission: submission = self.__search_engine.most_commented_submissions( subreddit=seed_params.get('subreddit'), before=seed_params.get('before'), after=seed_params.get('after'), limit=1)[0] try: submission = Submission.objects.get(id=submission['id']) except ObjectDoesNotExist: try: submission['subreddit'] = SubReddit.objects.get( name=submission['subreddit']) except ObjectDoesNotExist: subreddit = SubReddit( **self.__search_engine.subreddit_info( submission['subreddit'])) subreddit.save() submission['subreddit'] = subreddit try: submission['author'] = RedditUser.objects.get( name=submission['author']) except ObjectDoesNotExist: author = RedditUser( **self.__search_engine.redditor_info( submission['author'])) author.save() submission['author'] = author submission = Submission(**submission) submission.save() submission_seed = submission.to_dict() seed = Seed(seed=submission, domain=domain) seed.save() self.__seed = seed # if submission belongs to any subreddit self.__subreddit.add( submission_seed['subreddit'] if submission_seed['subreddit'][:2] != 'u_' else None) redditor = submission_seed['author'] self.__redditors = [redditor.name] print(redditor.name) attempts = 0 errors = [] while self.__r_idx < len(self.__redditors): print(len(self.__redditors)) try: redditor = RedditUser.objects.get( name=self.__redditors[self.__r_idx]) except ObjectDoesNotExist: redditor = RedditUser(**self.__search_engine.redditor_info( self.__redditors[self.__r_idx])) redditor.save() try: submissions = self.__search_engine.retrive_redditor_submissions( self.__redditors[self.__r_idx], domain) for submission in submissions: submission['author'] = redditor try: submission['subreddit'] = SubReddit.objects.get( name=submission['subreddit']) except ObjectDoesNotExist: subreddit = SubReddit( **self.__search_engine.subreddit_info( submission['subreddit'])) subreddit.save() submission['subreddit'] = subreddit self.__subreddit = self.__subreddit | {subreddit.name} submission_bulk = [ Submission(**submission) for submission in submissions ] comments = [] print(time.process_time() - t) for i, submission in enumerate(submissions): print("%d/%d" % (i + 1, len(submissions))) submission_comments = self.__search_engine.retrive_submission_comments( submission['id']) for comment in submission_comments: try: comment['author'] = RedditUser.objects.get( name=comment['author']) except ObjectDoesNotExist: try: author = RedditUser( **self.__search_engine.redditor_info( comment['author'])) author.save() comment['author'] = author except Exception: pass comment['submission'] = Submission( id=comment['submission']) comments += submission_comments comments = [ comment for comment in comments if not isinstance(comment['author'], str) ] comment_bulk = [Comment(**comment) for comment in comments] except Exception as ex: if attempts < max_attempts: attempts += 1 else: errors.append(self.__redditors[self.__r_idx]) attempts = 0 self.__r_idx += 1 continue Submission.objects.bulk_create(submission_bulk, ignore_conflicts=True) self.__submissions += [ submission['id'] for submissions in submissions ] Comment.objects.bulk_create(comment_bulk, ignore_conflicts=True) self.__comments += [comment['id'] for comment in comments] self.__redditors += list( {comment['author'].name for comment in comments} - set(self.__redditors)) self.__seed.submissions = self.__submissions self.__seed.comments = self.__comments self.__seed.redditors = self.__redditors self.__seed.subreddits = list(self.__subreddit) self.__seed.r_idx = self.__r_idx self.__seed.save() if max_interactions and self.__r_idx == max_interactions - 1: break self.__r_idx += 1