def handle(self, *args, **options): for file in sorted( glob.glob( '/var/www/vhosts/tmi-archive.com/mp3/import2/clean/*.mp3') ): title = file.split('/')[-1].replace('.mp3', '').replace('-', ' ').replace( '_', ' ') title = f'Imported - {title}' existing = Talk.objects.filter(title=title).count() if existing > 0: continue id_ = uuid.uuid4() id_ = base64.b32encode( id_.bytes).decode("utf-8").rstrip("=\n")[:12] new_file_dir = '/var/www/vhosts/tmi-archive.com/mp3.tmi-archive.com' new_file_name = f"tmi-archive-{id_}.mp3" shutil.copy(file, new_file_dir + '/' + new_file_name) talk = Talk(title=title, updated_by_id=1, created_by_id=1, original_file_name=file.split('/')[-1], audio_filename=new_file_name, audio_cleaned=new_file_name, audio_original=new_file_name) talk.auto_add_user_data = False talk.save() print(title, talk.id)
def handle(self, *args, **options): df = pd.read_json('./_other/scraper/articles-selenium.json') audio_path = '/home/script/_tmp/dhamma' for i, row in df.iloc[3:].iterrows(): title = row.title existing = Talk.objects.filter(title=title).count() if existing > 0: continue print(title) content = BeautifulSoup(row.article, "html.parser").find('div', {'class': 'entry-content'}) content = str(content).replace('<div class="entry-content">', '')[:-6] content = BeautifulSoup(content, "html.parser") for div in content.find_all("div", {'class': 'powerpress_player'}): div.decompose() for div in content.find_all("p", {'class': 'powerpress_links'}): div.decompose() content = str(content) talk = Talk( title=title, description=content, updated_by_id=1, created_by_id=1 ) talk.auto_add_user_data = False talk.save() if row.audio_link is not None: from django.core.files import File file_name = row.audio_link.replace('?_=1', '').split('/')[-1] orig_audio = '{}/mp3/{}'.format(audio_path, file_name) if os.path.exists(orig_audio): orig_audio = File(open(orig_audio, 'rb')) talk.audio_original.save('new', orig_audio) clean_audio = '{}/mp3-cleaned/{}'.format(audio_path, file_name) if os.path.exists(clean_audio): clean_audio = File(open(clean_audio, 'rb')) talk.audio_cleaned.save('new', clean_audio)