def download_subtitle(self, subtitle): logger.info('Downloading archive %s', subtitle) r = self.session.get(subtitle.subtitle_url, headers={'Referer': MAIN_SUBDIVX_URL+subtitle.subtitle_id}, timeout=10, verify=True) r.raise_for_status() # open the archive content = None archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Identified rar archive') content = RarFile(archive_stream) # logger.info('RarFile archive %r', content) elif is_zipfile(archive_stream): logger.debug('Identified zip archive') content = ZipFile(archive_stream) else: raise ValueError('Not a valid archive') # TODO content_list = content.namelist() # NON_LATINO_REFERENCES_IN_FILENAME = ['Espa§a'.decode('utf-8'),'espa§a'.decode('utf-8')] NON_LATINO_REFERENCES_IN_FILENAME = ['Espa§a', 'espa§a'] # logger.info('archive content_list %r', content_list) if len(content_list) == 1: sub = fix_line_ending(content.read(content_list[0])) else: for name in content_list: # logger.debug('name archive') logger.debug('name archive %s', name) # discard thae FORZADOS file if name.endswith('FORZADO.srt'): logger.debug('name.endswith(FORZADO.srt): %s', name) continue # discard hidden files if os.path.split(name)[-1].startswith('.'): logger.debug( 'os.path.split(name)[-1].startswith(.): %s', name) continue # LatinoamÇrica Espa§a # discard non-subtitle files if not name.lower().endswith(MY_SUBTITLE_EXTENSIONS): logger.debug( 'not name.lower().endswith(SUBTITLE_EXTENSIONS): %s', name) continue # discard Espa§a subtitle files if any(word in name for word in NON_LATINO_REFERENCES_IN_FILENAME): logger.debug('discard España subtitle files') continue else: logger.debug('sub selected: %s', name) sub = fix_line_ending(content.read(name)) # logger.info('sub %r', sub) subtitle.content = sub
def upload_comic(series_id, file=None, file_url=None): """ Given a series id and a file or a file url, upload the comic pages to s3 and create a new Comic instance in the given series. """ # We need at least one of the arguments. if file is None and file_url is None: return None # If a file url is provided, download it to memory and get its file name. if file_url: req = requests.get(file_url, stream=True) d = req.headers['content-disposition'] file_name = re.findall("filename=(.+)", d) file_name = file_name[0][:-1][1:] # Remove double quotes. file = io.BytesIO(req.content) closing(req) # Otherwise simply take its file name. else: file_name = file.name # Determine whether it's a CBR or a CBZ and create a RarFile or a ZipFile. if file_name.endswith('.{}'.format(CBR)): cb_file = RarFile(file) cb_type = CBR elif file_name.endswith('.{}'.format(CBZ)): cb_file = ZipFile(file) cb_type = CBZ else: return None # Go through the CBZ/CBR pages and upload all of them to s3. page_urls = [] for file_name in cb_file.namelist(): if not file_name.lower().endswith( '.jpg') and not file_name.lower().endswith('.png'): continue image_url = upload(ContentFile(cb_file.read(file_name)), name=file_name, prefix=False, bucket_name=False, key=None, secret=None, host=None, expires=0, query_auth=False, force_http=True, policy=None) page_urls.append(image_url) # Create a comic. return Comic.objects.create(title=file_name.replace('.cbz', ''), file_type=cb_type, pages='|'.join(page_urls), series_id=series_id)
def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: raise ProviderError('Unidentified archive type') # extract subtitle's content subs_in_archive = [] for name in archive.namelist(): for ext in (".srt", ".sub", ".ssa", ".ass"): if name.endswith(ext): subs_in_archive.append(name) # select the correct subtitle file matching_sub = None if len(subs_in_archive) == 1: matching_sub = subs_in_archive[0] else: for sub_name in subs_in_archive: guess = guessit(sub_name) # consider subtitle valid if: # - episode and season match # - format matches (if it was matched before) # - release group matches (and we asked for one and it was matched, or it was not matched) if guess["episode"] == subtitle.episode and guess[ "season"] == subtitle.season: format_matches = "format" not in subtitle.matches or \ ("format" in subtitle.matches and guess["format"].lower() in subtitle.releases.lower()) release_group_matches = True if subtitle.asked_for_release_group: release_group_matches = "release_group" not in subtitle.matches or \ ("release_group" in subtitle.matches and guess["release_group"].lower() == subtitle.asked_for_release_group.lower()) if release_group_matches and format_matches: matching_sub = sub_name break if not matching_sub: raise ProviderError("None of expected subtitle found in archive") subtitle.content = fix_line_ending(archive.read(matching_sub))
def download_subtitle(self, subtitle): logger.info("Download subtitle %r", subtitle.link) r = self.session.get(subtitle.link, params=None, timeout=10) r.raise_for_status() # check if it's rar or zip if r.content[:4] == "Rar!": compressed = RarFile(StringIO(r.content)) content = compressed.read(compressed.namelist()[0]) elif r.content[:4] == "PK": compressed = ZipFile(StringIO(r.content)) content = compressed.read(compressed.namelist()[0]) else: content = r.content # some ppl just put wrong file inside if mimetypes.guess_type(content)[0] != "text/plain": raise ProviderError("Wrong subtitle inside compressed file") subtitle.content = fix_line_ending(content)
async def fetch_file(session, url): print(url) async with session.get(url, timeout=60 * 60) as response: if response.status != 200: return await asyncio.sleep(0) file_content = await response.content.read() zipped_package = RarFile(BytesIO(file_content)) xml_filename = [ name for name in zipped_package.namelist() if name.endswith('.xml') ][0] xml_file = zipped_package.read(xml_filename) with open(os.path.join(ROOT, 'compendium', xml_filename), 'wb') as handler: handler.write(xml_file) return await response.text()
def extrair_rar(rf: RarFile, caminho: str): """ Extrai csvs de um arquivo . rar. Parâmetros ---------- rf: Rarfile conteúdo do arquivo compactado. caminho: str caminho para pasta onde os arquivos devem ser salvos. """ ano = caminho.split('/')[-1] n_arquivos = len(rf.infolist()) for f in rf.infolist(): # Filtra arquivos csvs comprimidos if f.filename.endswith('csv'): filename = f.filename.split('/')[-1] print("\033[94m>> Baixando {}/{}\033[0m".format(ano, filename)) with open(caminho + '/' + filename, "wb") as of: of.write(rf.read(f.filename))
def handle(self, *args, **options): update_flag = True try: latest_update = FiasUpdateInfo.objects.latest("create_ts") except FiasUpdateInfo.DoesNotExist: latest_update = None fias_soap = Client(settings.FIAS_URL) latest_soap = fias_soap.service.GetLastDownloadFileInfo() version = latest_soap.VersionId if latest_update: if int(version) <= int(latest_update.version): update_flag = False if update_flag: xurl = latest_soap.FiasDeltaXmlUrl delta_file = urllib2.urlopen(xurl) input_file = StringIO(delta_file.read()) new_update = FiasUpdateInfo(version=version) new_update.textversion = latest_soap.TextVersion.encode("utf8") new_update.delta_url = latest_soap.FiasDeltaXmlUrl new_update.delta_file.save("fias_update_%s.rar" % version, ContentFile(input_file.getvalue()), save=False) new_update.save() # unpack, get xml, write to DB rar_file = RarFile(new_update.delta_file.path) update_file_addr = None update_file_house = None for packed_file in rar_file.namelist(): if packed_file.find("_ADDROBJ_") >= 0: update_file_addr = packed_file if packed_file.find("_HOUSE_") >= 0: update_file_house = packed_file # AddressObj if not update_file_addr: xml_string_addr = rar_file.read(update_file_addr) xml_tree_addr = etree.fromstring(xml_string_addr) update_items_addr = xml_tree_addr.getchildren() if update_items_addr and update_items_addr[0].keys(): fields_addr = update_items_addr[0].keys() update_length_addr = len(update_items_addr) for counter_addr, update_item_addr in enumerate(update_items_addr): new_addrobj = AddressObj() for field_addr in fields_addr: setattr(new_addrobj, field_addr, update_item_addr.get(field_addr)) new_addrobj.save() print u"%s Address objects left" % (update_length_addr - counter_addr) else: print u"Wrong format of Address update file" else: print u"AddressObj file not found in the update" # House if update_file_house: xml_string_house = rar_file.read(update_file_house) xml_tree_house = etree.fromstring(xml_string_house) update_items_house = xml_tree_house.getchildren() if update_items_house and update_items_house[0].keys(): fields_house = update_items_house[0].keys() update_length_house = len(update_items_house) for counter_house, update_item_house in enumerate(update_items_house): new_house = House() for field_house in fields_house: setattr(new_house, field_house, update_item_house.get(field_house)) new_house.save() print u"%s House objects left" % (update_length_house - counter_house) else: print u"Wrong format of House update file" else: print u"House file not found in the update" print u"Updated successfully" else: print u"No new updates found"
from rarfile import RarFile from rarfile import BadRarFile from rarfile import PasswordRequired rar=RarFile("/home/ljd/1.rar") filename=rar.namelist()[0] for x in xrange(100000): try: rar.setpassword(str(x)) rar.read(filename) print x exit() except BadRarFile,PasswordRequired: pass
from rarfile import RarFile from rarfile import BadRarFile from rarfile import PasswordRequired rar = RarFile("/home/ljd/1.rar") filename = rar.namelist()[0] for x in xrange(100000): try: rar.setpassword(str(x)) rar.read(filename) print x exit() except BadRarFile, PasswordRequired: pass