Ejemplo n.º 1
0
    def download_subtitle(self, subtitle):
        logger.info('Downloading archive %s', subtitle)
        r = self.session.get(subtitle.subtitle_url, headers={'Referer': MAIN_SUBDIVX_URL+subtitle.subtitle_id},
                             timeout=10, verify=True)
        r.raise_for_status()

        # open the archive
        content = None
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            content = RarFile(archive_stream)
            # logger.info('RarFile archive %r', content)
        elif is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            content = ZipFile(archive_stream)

        else:
            raise ValueError('Not a valid archive')

        # TODO
        content_list = content.namelist()
        # NON_LATINO_REFERENCES_IN_FILENAME = ['Espa§a'.decode('utf-8'),'espa§a'.decode('utf-8')]
        NON_LATINO_REFERENCES_IN_FILENAME = ['Espa§a', 'espa§a']
        # logger.info('archive content_list %r', content_list)

        if len(content_list) == 1:
            sub = fix_line_ending(content.read(content_list[0]))
        else:
            for name in content_list:
                # logger.debug('name archive')
                logger.debug('name archive %s', name)
                # discard thae FORZADOS file
                if name.endswith('FORZADO.srt'):
                    logger.debug('name.endswith(FORZADO.srt): %s', name)
                    continue

                # discard hidden files
                if os.path.split(name)[-1].startswith('.'):
                    logger.debug(
                        'os.path.split(name)[-1].startswith(.): %s', name)
                    continue

                    # LatinoamÇrica  Espa§a

                # discard non-subtitle files
                if not name.lower().endswith(MY_SUBTITLE_EXTENSIONS):
                    logger.debug(
                        'not name.lower().endswith(SUBTITLE_EXTENSIONS): %s', name)
                    continue
                # discard Espa§a subtitle files
                if any(word in name for word in NON_LATINO_REFERENCES_IN_FILENAME):
                    logger.debug('discard España subtitle files')
                    continue
                else:
                    logger.debug('sub selected: %s', name)
                    sub = fix_line_ending(content.read(name))
        # logger.info('sub %r', sub)
        subtitle.content = sub
Ejemplo n.º 2
0
def upload_comic(series_id, file=None, file_url=None):
    """
    Given a series id and a file or a file url, upload the comic pages
    to s3 and create a new Comic instance in the given series.
    """
    # We need at least one of the arguments.
    if file is None and file_url is None:
        return None

    # If a file url is provided, download it to memory and get its file name.
    if file_url:
        req = requests.get(file_url, stream=True)
        d = req.headers['content-disposition']
        file_name = re.findall("filename=(.+)", d)
        file_name = file_name[0][:-1][1:]  # Remove double quotes.
        file = io.BytesIO(req.content)
        closing(req)
    # Otherwise simply take its file name.
    else:
        file_name = file.name

    # Determine whether it's a CBR or a CBZ and create a RarFile or a ZipFile.
    if file_name.endswith('.{}'.format(CBR)):
        cb_file = RarFile(file)
        cb_type = CBR
    elif file_name.endswith('.{}'.format(CBZ)):
        cb_file = ZipFile(file)
        cb_type = CBZ
    else:
        return None

    # Go through the CBZ/CBR pages and upload all of them to s3.
    page_urls = []
    for file_name in cb_file.namelist():
        if not file_name.lower().endswith(
                '.jpg') and not file_name.lower().endswith('.png'):
            continue
        image_url = upload(ContentFile(cb_file.read(file_name)),
                           name=file_name,
                           prefix=False,
                           bucket_name=False,
                           key=None,
                           secret=None,
                           host=None,
                           expires=0,
                           query_auth=False,
                           force_http=True,
                           policy=None)
        page_urls.append(image_url)

    # Create a comic.
    return Comic.objects.create(title=file_name.replace('.cbz', ''),
                                file_type=cb_type,
                                pages='|'.join(page_urls),
                                series_id=series_id)
Ejemplo n.º 3
0
    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            raise ProviderError('Unidentified archive type')

        # extract subtitle's content
        subs_in_archive = []
        for name in archive.namelist():
            for ext in (".srt", ".sub", ".ssa", ".ass"):
                if name.endswith(ext):
                    subs_in_archive.append(name)

        # select the correct subtitle file
        matching_sub = None
        if len(subs_in_archive) == 1:
            matching_sub = subs_in_archive[0]
        else:
            for sub_name in subs_in_archive:
                guess = guessit(sub_name)

                # consider subtitle valid if:
                # - episode and season match
                # - format matches (if it was matched before)
                # - release group matches (and we asked for one and it was matched, or it was not matched)
                if guess["episode"] == subtitle.episode and guess[
                        "season"] == subtitle.season:
                    format_matches = "format" not in subtitle.matches or \
                                     ("format" in subtitle.matches and guess["format"].lower() in
                                      subtitle.releases.lower())

                    release_group_matches = True
                    if subtitle.asked_for_release_group:
                        release_group_matches = "release_group" not in subtitle.matches or \
                                                ("release_group" in subtitle.matches and
                                                 guess["release_group"].lower() ==
                                                 subtitle.asked_for_release_group.lower())

                    if release_group_matches and format_matches:
                        matching_sub = sub_name
                        break

        if not matching_sub:
            raise ProviderError("None of expected subtitle found in archive")
        subtitle.content = fix_line_ending(archive.read(matching_sub))
Ejemplo n.º 4
0
    def download_subtitle(self, subtitle):
        logger.info("Download subtitle %r", subtitle.link)

        r = self.session.get(subtitle.link, params=None, timeout=10)
        r.raise_for_status()

        # check if it's rar or zip
        if r.content[:4] == "Rar!":
            compressed = RarFile(StringIO(r.content))
            content = compressed.read(compressed.namelist()[0])
        elif r.content[:4] == "PK":
            compressed = ZipFile(StringIO(r.content))
            content = compressed.read(compressed.namelist()[0])
        else:
            content = r.content

        # some ppl just put wrong file inside
        if mimetypes.guess_type(content)[0] != "text/plain":
            raise ProviderError("Wrong subtitle inside compressed file")

        subtitle.content = fix_line_ending(content)
Ejemplo n.º 5
0
async def fetch_file(session, url):
    print(url)
    async with session.get(url, timeout=60 * 60) as response:
        if response.status != 200:
            return await asyncio.sleep(0)

        file_content = await response.content.read()
        zipped_package = RarFile(BytesIO(file_content))

        xml_filename = [
            name for name in zipped_package.namelist() if name.endswith('.xml')
        ][0]
        xml_file = zipped_package.read(xml_filename)

        with open(os.path.join(ROOT, 'compendium', xml_filename),
                  'wb') as handler:
            handler.write(xml_file)

        return await response.text()
Ejemplo n.º 6
0
def extrair_rar(rf: RarFile, caminho: str):
    """ Extrai csvs de um arquivo . rar.

    Parâmetros
    ----------
    rf: Rarfile
        conteúdo do arquivo compactado.
    caminho: str
        caminho para pasta onde os arquivos devem ser salvos.
    """
    ano = caminho.split('/')[-1]
    n_arquivos = len(rf.infolist())
    for f in rf.infolist():
        # Filtra arquivos csvs comprimidos
        if f.filename.endswith('csv'):
            filename = f.filename.split('/')[-1]
            print("\033[94m>> Baixando {}/{}\033[0m".format(ano, filename))
            with open(caminho + '/' + filename, "wb") as of:
                of.write(rf.read(f.filename))
Ejemplo n.º 7
0
    def handle(self, *args, **options):
        update_flag = True

        try:
            latest_update = FiasUpdateInfo.objects.latest("create_ts")
        except FiasUpdateInfo.DoesNotExist:
            latest_update = None

        fias_soap = Client(settings.FIAS_URL)

        latest_soap = fias_soap.service.GetLastDownloadFileInfo()

        version = latest_soap.VersionId

        if latest_update:
            if int(version) <= int(latest_update.version):
                update_flag = False

        if update_flag:
            xurl = latest_soap.FiasDeltaXmlUrl
            delta_file = urllib2.urlopen(xurl)
            input_file = StringIO(delta_file.read())
            new_update = FiasUpdateInfo(version=version)
            new_update.textversion = latest_soap.TextVersion.encode("utf8")
            new_update.delta_url = latest_soap.FiasDeltaXmlUrl
            new_update.delta_file.save("fias_update_%s.rar" % version, ContentFile(input_file.getvalue()), save=False)

            new_update.save()

            # unpack, get xml, write to DB

            rar_file = RarFile(new_update.delta_file.path)

            update_file_addr = None
            update_file_house = None
            for packed_file in rar_file.namelist():
                if packed_file.find("_ADDROBJ_") >= 0:
                    update_file_addr = packed_file
                if packed_file.find("_HOUSE_") >= 0:
                    update_file_house = packed_file

            # AddressObj
            if not update_file_addr:
                xml_string_addr = rar_file.read(update_file_addr)

                xml_tree_addr = etree.fromstring(xml_string_addr)
                update_items_addr = xml_tree_addr.getchildren()
                if update_items_addr and update_items_addr[0].keys():
                    fields_addr = update_items_addr[0].keys()
                    update_length_addr = len(update_items_addr)

                    for counter_addr, update_item_addr in enumerate(update_items_addr):
                        new_addrobj = AddressObj()
                        for field_addr in fields_addr:
                            setattr(new_addrobj, field_addr, update_item_addr.get(field_addr))

                        new_addrobj.save()
                        print u"%s Address objects left" % (update_length_addr - counter_addr)
                else:
                    print u"Wrong format of Address update file"
            else:
                print u"AddressObj file not found in the update"

            # House
            if update_file_house:
                xml_string_house = rar_file.read(update_file_house)

                xml_tree_house = etree.fromstring(xml_string_house)
                update_items_house = xml_tree_house.getchildren()
                if update_items_house and update_items_house[0].keys():
                    fields_house = update_items_house[0].keys()
                    update_length_house = len(update_items_house)

                    for counter_house, update_item_house in enumerate(update_items_house):
                        new_house = House()
                        for field_house in fields_house:
                            setattr(new_house, field_house, update_item_house.get(field_house))

                        new_house.save()
                        print u"%s House objects left" % (update_length_house - counter_house)
                else:
                    print u"Wrong format of House update file"
            else:
                print u"House file not found in the update"

            print u"Updated successfully"

        else:
            print u"No new updates found"
Ejemplo n.º 8
0
Archivo: rar.py Proyecto: lujinda/pylot
from rarfile import RarFile
from rarfile import BadRarFile
from rarfile import PasswordRequired
rar=RarFile("/home/ljd/1.rar")
filename=rar.namelist()[0]
for x in xrange(100000):
    try:
        rar.setpassword(str(x))
        rar.read(filename)
        print x   
        exit()
    except BadRarFile,PasswordRequired:
        pass
    
Ejemplo n.º 9
0
from rarfile import RarFile
from rarfile import BadRarFile
from rarfile import PasswordRequired
rar = RarFile("/home/ljd/1.rar")
filename = rar.namelist()[0]
for x in xrange(100000):
    try:
        rar.setpassword(str(x))
        rar.read(filename)
        print x
        exit()
    except BadRarFile, PasswordRequired:
        pass