Beispiel #1
0
    def test_replace_metadata(self):
        old_zip_file = get_test_zipfile('LotsOfFiles')

        metadata = models.parse_zipfile_metadata(old_zip_file)
        old_zip_file.seek(0)

        extension = models.Extension.objects.create_from_metadata(metadata, creator=self.user)
        version = models.ExtensionVersion(extension=extension,
                                          source=File(old_zip_file))

        version.parse_metadata_json(metadata)

        new_zip = version.get_zipfile('r')

        old_zip = ZipFile(File(old_zip_file), 'r')
        self.assertEqual(len(old_zip.infolist()), len(new_zip.infolist()))
        self.assertEqual(new_zip.read("metadata.json"),
                         version.make_metadata_json_string())

        for old_info in old_zip.infolist():
            if old_info.filename == "metadata.json":
                continue

            new_info = new_zip.getinfo(old_info.filename)
            self.assertEqual(old_zip.read(old_info), new_zip.read(new_info))
            self.assertEqual(old_info.date_time, new_info.date_time)

        old_zip.close()
        new_zip.close()
Beispiel #2
0
class RemoteZipFile(object):
    """A potentially remote ZIP file"""

    def __init__(self, name_or_url):  # Holes smaller than 5MB will be read anyway.
        if name_or_url.startswith("http:"):
            self._f = httpopen(name_or_url)
            self._use_read_buffer = True
            # prefetch the last MB to capture most of the index
            self._f.prefetch([(self._f.size - 1024 * 1024, self._f.size)])
        else:
            self._f = open(name_or_url)
            self._use_read_buffer = False

        self._zf = ZipFile(self._f)
        if self._use_read_buffer:
            self._sinfo = sorted((i.header_offset, i) for i in self._zf.infolist())
            self._dict = dict((i.filename, i) for i in self._zf.infolist())

    def keys(self):
        return self._zf.namelist()

    def require(self, required):
        if self._use_read_buffer:

            def get_block_range(block_id):
                s = self._sinfo[block_id][1].header_offset
                if block_id != len(self._sinfo) - 1:
                    e = self._sinfo[block_id + 1][1].header_offset
                else:
                    e = self._f.size - 1
                return (s, e)

            blocks = [j for j, (_, i) in enumerate(self._sinfo) if i.filename in required]
            read_blocks = []
            for i in blocks:
                if not read_blocks:
                    read_blocks.append(get_block_range(i))
                else:
                    start, end = read_blocks[-1]
                    b_start, b_end = get_block_range(i)
                    if b_start > end + MIN_HOLE_SIZE:
                        read_blocks.append((b_start, b_end))
                    else:
                        read_blocks[-1] = (start, b_end)
            self._f.prefetch(read_blocks)

        rset = set(required)
        for i in self._zf.infolist():
            if i.filename in rset:
                rset.remove(i.filename)
                x = self._zf.open(i)
                write(1, struct.pack("i", len(i.filename)))
                write(1, i.filename)
                write(1, struct.pack("i", i.file_size))
                write(1, x.read())
                # x.read1(i.file_size)
                # x.read()#1(i.file_size)
        assert not rset, rset
Beispiel #3
0
class ExtractFile(object):
    def __init__(self, fp, work_path, regex_util=None, show_info=False, progress=None, eu_text=None):
        self.fp = fp
        self.work_path = work_path
        self.zf = ZipFile(self.fp, mode="r")
        self.regex_util = regex_util
        self.progress = progress
        self.uncompress_size = sum((f.file_size for f in self.zf.infolist()))
        self.show_info = show_info
        self.eu_text = eu_text

    def clean_work_path(self):
        if os.path.exists(self.work_path):
            shutil.rmtree(self.work_path)
        os.mkdir(self.work_path)

    def extract(self):
        members = self.zf.infolist()
        if self.show_info:
            self.start_extract()
        total = 0
        for zip_info in members:
            total += zip_info.file_size
            if not self.regex_util or not self.regex_util.do_match(spit_filename(zip_info.filename, True)):
                if len(zip_info.filename) + len(self.work_path) + 1 < 255:
                    self.zf.extract(zip_info.filename, self.work_path)
                    if self.eu_text and spit_filename(zip_info.filename, True) == "important.properties":
                        file_path = os.path.join(self.work_path, zip_info.filename)
                        try:
                            cf = MyConfigParser(file_path, file_error=True)
                            regrex = ["\$\{" + key + "\}" for key in cf.keys()]
                            self.eu_text.add_regex(regrex)
                        except Exception as exe:
                            print exe
                            print self.work_path + "/" + zip_info.filename
                    if self.show_info:
                        self.update_extract(total)
                else:
                    print "path len > 255   ", self.work_path, zip_info.filename
            else:
                pass
        if self.show_info:
            self.finish_extract()
        self.zf.close()

    def start_extract(self):
        if self.progress:
            self.progress.start_extract(fp=self.fp, uncompress_size=self.uncompress_size)

    def finish_extract(self):
        if self.progress:
            self.progress.finish_extract(fp=self.fp)

    def update_extract(self, extract_size):
        if self.progress:
            self.progress.update_extract(extract_size=extract_size)
    def _build_trek_ressources(self, trek, language, force):
        logger.info("Build %s ressources file for trek '%s'..." % (language, trek.properties.name))

        output_folder = os.path.join(settings.INPUT_DATA_ROOT, language, 'api/trek')
        if not os.path.exists(output_folder):
            logger.info("Create folder %s" % output_folder)
            os.makedirs(output_folder)
        zipfilename = os.path.join(output_folder, 'trek-%u.zip' % trek.id)
        zipfile = ZipFile(zipfilename + '.new', 'w')

        media = set()
        missing_media = set()

        trek_dest = 'trek/{trek.pk}'.format(trek=trek)
        # All pictures
        for picture in trek.properties.pictures:
            media.add((picture['url'], trek_dest))
        # Information desks picture
        for desk in trek.properties.information_desks:
            if desk['photo_url']:
                media.add((desk['photo_url'], trek_dest))
        # Only one picture per POI
        for poi in trek.pois.all():
            poi_dest = 'poi/{poi.pk}'.format(poi=poi)
            if poi.properties.pictures:
                media.add((poi.properties.pictures[0]['url'], poi_dest))

        if missing_media:
            logger.warning('Missing media: ' + ', '.join(missing_media))

        for url, dest in media:
            url = unquote(url).lstrip('/')
            fullpath = os.path.join(settings.INPUT_DATA_ROOT, url)
            arcname = os.path.join(dest, os.path.basename(url))
            zipfile.write(fullpath, arcname)

        try:
            oldzipfile = ZipFile(zipfilename, 'r')
        except IOError:
            uptodate = False
        else:
            old = set([(zi.filename, zi.CRC) for zi in oldzipfile.infolist()])
            new = set([(zi.filename, zi.CRC) for zi in zipfile.infolist()])
            uptodate = (old == new) and not force
            oldzipfile.close()

        zipfile.close()
        if uptodate:
            os.unlink(zipfilename + '.new')
            logger.info('%s was up to date.' % zipfilename)
        else:
            os.rename(zipfilename + '.new', zipfilename)
            logger.info('%s done.' % zipfilename)
Beispiel #5
0
    def read_words_from_anki_pkg(self, anki_pkg_file_path):
        """Reads words from an apkg file into a set

            arguments: 
                anki_pkg_file_path(str): path to the anki pkg file

            returns:
                (set) of words
        """
        pass

        anki_deck_db_url = ''

        #extract the collection database from .pkg file
        if not os.path.exists(anki_pkg_file_path):
            raise ValueError('Path to apkg file is not valid')

        try:
            zf = ZipFile(anki_pkg_file_path, 'r')
            #open the zipfile looking for the collection.anki2 file
            for info in zf.infolist():
                if info.filename == 'collection.anki2':
                    zf.extract(info.filename)
                    anki_deck_db_url = "sqlite:///{0}".format(info.filename)

        except BadZipfile:
            raise ValueError('apkg file is corrupt or not valid')
            return
                
        if len(anki_deck_db_url) == 0:
            raise ValueError('No collection database file found in apkg')

	return Anki().distill_words(anki_deck_db_url)
def do(zip_file_name, include):
    print('zip_file_name:', zip_file_name)
    print('Include files:', include)

    # Измененный zip
    out_zip_file_name = '_' + zip_file_name

    try:
        print('open {} and {} zip arhives'.format(zip_file_name, out_zip_file_name))
        zin = ZipFile(zip_file_name, 'r')
        zout = ZipFile(out_zip_file_name, 'w')

        print('start fill {} zip arhive'.format(out_zip_file_name))

        for item in zin.infolist():
            buffer = zin.read(item.filename)

            if any((fnmatch.fnmatch(item.filename, pattern) for pattern in include)):
                zout.writestr(item, buffer)
            else:
                print('Delete', item.filename)

        print('finish fill {} zip arhive'.format(out_zip_file_name))

    finally:
        zout.close()
        zin.close()

        # Удаляем оригинальный
        print('remove original {} zip file'.format(zip_file_name))
        os.remove(zip_file_name)

        # Переименновываем измененный zip в оригинальный
        print('rename {} zip file as original {}'.format(out_zip_file_name, zip_file_name))
        os.rename(out_zip_file_name, zip_file_name)
Beispiel #7
0
class ApkParser:
 def __init__(self, file):
  self._file = ZipFile(file)

 def getManifest(self):
  return AXML(self._file.read('AndroidManifest.xml')).get_xml_obj()

 def getPackageName(self):
  return self.getManifest().documentElement.getAttribute('package')

 def getVersionCode(self):
  return int(self.getManifest().documentElement.getAttribute('android:versionCode'))

 def getVersionName(self):
  return self.getManifest().documentElement.getAttribute('android:versionName')

 def getMinSdkVersion(self):
  return int(self.getManifest().documentElement.getElementsByTagName('uses-sdk')[0].getAttribute('android:minSdkVersion'))

 def _getCerts(self):
  for info in self._file.infolist():
   if info.filename.startswith('META-INF/') and info.filename.endswith('.RSA'):
    for cert in ContentInfo.load(self._file.read(info))['content']['certificates']:
     yield cert.dump()

 def getCert(self):
  certs = list(self._getCerts())
  if len(certs) != 1:
   raise Exception('Cannot read certificate')
  return certs[0]
Beispiel #8
0
def getTranslations(type, localesDir, defaultLocale, projectName, key):
  result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/export?key=%s' % (projectName, key)).read()
  if result.find('<success') < 0:
    raise Exception('Server indicated that the operation was not successful\n' + result)

  result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/download/all.zip?key=%s' % (projectName, key)).read()
  zip = ZipFile(StringIO(result))
  dirs = {}
  for info in zip.infolist():
    if not info.filename.endswith('.json'):
      continue

    dir, file = os.path.split(info.filename)
    if not re.match(r'^[\w\-]+$', dir) or dir == defaultLocale:
      continue
    if type == 'chrome' and file.count('.') == 1:
      origFile = file
    else:
      origFile = re.sub(r'\.json$', '', file)
    if type == 'gecko' and not origFile.endswith('.dtd') and not origFile.endswith('.properties'):
      continue

    mapping = langMappingChrome if type == 'chrome' else langMappingGecko
    for key, value in mapping.iteritems():
      if value == dir:
        dir = key
    if type == 'chrome':
      dir = dir.replace('-', '_')

    data = zip.open(info.filename).read()
    if data == '[]':
      continue

    if not dir in dirs:
      dirs[dir] = set()
    dirs[dir].add(origFile)

    path = os.path.join(localesDir, dir, origFile)
    if not os.path.exists(os.path.dirname(path)):
      os.makedirs(os.path.dirname(path))
    if type == 'chrome' and origFile.endswith('.json'):
      postprocessChromeLocale(path, data)
    elif type == 'chrome':
      data = json.loads(data)
      if origFile in data:
        fileHandle = codecs.open(path, 'wb', encoding='utf-8')
        fileHandle.write(data[origFile]['message'])
        fileHandle.close()
    else:
      fromJSON(path, data)

  # Remove any extra files
  for dir, files in dirs.iteritems():
    baseDir = os.path.join(localesDir, dir)
    if not os.path.exists(baseDir):
      continue
    for file in os.listdir(baseDir):
      path = os.path.join(baseDir, file)
      if os.path.isfile(path) and (file.endswith('.json') or file.endswith('.properties') or file.endswith('.dtd')) and not file in files:
        os.remove(path)
Beispiel #9
0
    def save_file_data(settle_date, data, temp_path, merchant_id, temp_prefix='unionpay_'):
        '''
        @settle_date:   like 1216 for generate filename
        @data:          fileContent from request
        @temp_path:     save data to a temp path

        '''
        timeRandomString = datetime.now().strftime("%Y%m%d%H%M%S")
        path = os.path.join(
            temp_path, "%s%s%s" % (temp_prefix, datetime.now().year, settle_date))

        if not os.path.exists(path):
            os.mkdir(path)

        fileWholePath = "%s/SMT_%s.zip" % (path, timeRandomString)
        with open(fileWholePath, 'wb') as f:
            f.write(data)
        logger.debug("temp file <%s> created!" % fileWholePath)
        zfile = ZipFile(fileWholePath, 'r')
        zfile.extractall(path)
        files_list = zfile.infolist()
        logger.debug("file <%s> unziped!" % ','.join(zfile.namelist()))
        zfile.close()
        logger.debug("balance file <%s> saved!" % path)
        os.unlink(fileWholePath)
        logger.debug("temp file deleted")

        balance_files = []

        for item in files_list:
            if Signer.accept_filetype(item.filename, merchant_id):
                balance_files.append(os.path.join(path, item.filename))
        return balance_files
Beispiel #10
0
    def assertInZip(self, expected_files, filename, equal=True):
        """
        Check if the given `expected_files` exists in the Zip archive.
        """
        new_filename = None
        try:
            # If a stream is provided, dump it a file. ZipFile doesn't read file from a stream.
            if not isinstance(filename, str):
                f = filename
                filename = new_filename = tempfile.mktemp(prefix='rdiffweb_test_archiver_', suffix='.zip')
                with io.open(new_filename, 'wb') as out:
                    byte = f.read(4096)
                    while byte:
                        out.write(byte)
                        byte = f.read(4096)
                f.close()

            # Get data from zip.
            actual = {}
            a = ZipFile(filename)
            for m in a.infolist():
                name = m.filename
                if isinstance(name, bytes):
                    name = name.decode('utf8')
                actual[name] = m.file_size
            a.close()
            # Compare.
            if equal:
                self.assertEqual(expected_files, actual)
            else:
                for expected_file in expected_files:
                    self.assertIn(expected_file, actual)
        finally:
            if new_filename:
                os.remove(new_filename)
Beispiel #11
0
    def test(self):
        """
        Check server response, file name and  order hash and timestamp applying
        """

        order_hash = uuid.uuid4().hex
        url = 'https://s3.eu-central-1.amazonaws.com/saxo-static/ebooks/line-vindernovelle-i-krimidysten.epub'

        response = client.get(reverse('add_mark')+'?url=%s&order_hash=%s' % (url, order_hash))
        origin_name = url.split('/')[-1]
        self.assertEqual(status.HTTP_200_OK, response.status_code)
        self.assertIn('filename=%s' % origin_name, response.get('Content-Disposition'))
        zipped_file = ZipFile(StringIO(response.content), 'r')

        file_to_check = 'META-INF/container.xml'
        # check only date because can be next hour after file received
        timestamp = datetime.now().strftime('%Y-%m-%d')

        self.assertIn(file_to_check, zipped_file.namelist())
        for item in zipped_file.infolist():
            if item.filename == file_to_check:
                data = zipped_file.read(item.filename)

                self.assertIn(order_hash, data)
                self.assertIn(timestamp, data)

        zipped_file.close()
 def _unpack(self, filepath):
     """Determines the vim script's extension and unpacks it. Sets the
     files variable. Removes the archive file and temp dir.
     """
     import os.path
     root, ext = os.path.splitext(filepath)
     if ext == u'.zip':
         from zipfile import ZipFile
         archive = ZipFile(filepath, 'r')
         self.files = [member.filename for member in archive.infolist() if
                 not member.filename[-1] == '/']
         archive.extractall('/home/chris/.vim')
     elif ext == u'.tar' or ext == u'.tgz' or ext == u'.bz2':
         import tarfile
         archive = tarfile.open(filepath)
         self.files = [member.name for member in archive.getmembers() if
                 member.isfile()]
         archive.extractall('/home/chris/.vim')
     elif (ext == u'.gz' or ext == u'.bz2') and (os.path.splitext(root)[1] ==
             u'.tar'):
         import tarfile
         archive = tarfile.open(filepath)
         self.files = [member.name for member in archive.getmembers() if
                 member.isfile()]
         archive.extractall('/home/chris/.vim')
     elif ext == u'.vba':
         vimExecute(':so %\n:q\n')
         self.files=[filepath]
     # Cleanup
     print "Deleting {0} and {1}".format(filepath, os.path.dirname(filepath))
     os.unlink(filepath)
     os.rmdir(os.path.dirname(filepath))
Beispiel #13
0
def test_pydist():
    """Make sure pydist.json exists and validates against our schema."""
    # XXX this test may need manual cleanup of older wheels

    import jsonschema

    def open_json(filename):
        with open(filename, 'rb') as json_file:
            return json.loads(json_file.read().decode('utf-8'))

    pymeta_schema = open_json(resource_filename('wheel.test',
                                                'pydist-schema.json'))
    valid = 0
    for dist in ("simple.dist", "complex-dist"):
        basedir = pkg_resources.resource_filename('wheel.test', dist)
        for (dirname, subdirs, filenames) in os.walk(basedir):
            for filename in filenames:
                if filename.endswith('.whl'):
                    whl = ZipFile(os.path.join(dirname, filename))
                    for entry in whl.infolist():
                        if entry.filename.endswith('/metadata.json'):
                            pymeta = json.loads(whl.read(entry).decode('utf-8'))
                            jsonschema.validate(pymeta, pymeta_schema)
                            valid += 1
    assert valid > 0, "No metadata.json found"
Beispiel #14
0
def zip_data(source, modified_files):
    file = StringIO()
    outzip = ZipFile(file, 'w')
    zip = ZipFile(StringIO(source))
    for info in zip.infolist():
        # Replace the data from the map
        if info.filename in modified_files:
            data = modified_files[info.filename]
            if data is None:
                continue
        else:
            data = zip.read(info.filename)

        # Section 17.4 says the mimetype file shall not include an extra
        # field.  So we remove it even if present in the source.
        if info.filename == 'mimetype':
            info.extra = ''

        # Ok
        outzip.writestr(info, data)

    # Ok
    outzip.close()
    content = file.getvalue()
    file.close()
    return content
Beispiel #15
0
def mass_upload(request, form):

    # seems like default storage can't read from a zip file
    # built on an uploaded file.  Try saving it first.
    import tempfile
    tmpfile = tempfile.TemporaryFile()
    tmpfile.write(request.FILES['file'].read())
    tmpfile.seek(0)

    zipfile = ZipFile(tmpfile, mode="r")
    for info in zipfile.infolist():
        # and it seems like we need a tmp file per file in the zip too
        tmpfile2 = tempfile.TemporaryFile()
        f = zipfile.open(info, "r")
        tmpfile2.write(f.read())
        f.close()
        tmpfile2.seek(0)
        django_file = DjangoFile(tmpfile2)
        setattr(django_file, 'size', info.file_size)

        chorus_file = ChorusFile.objects.create(name=info.filename,
                                                user=request.user,
                                                comments=form.cleaned_data['comments'] or info.comment,
                                                timestamp=datetime.datetime.now(),
                                                purpose=form.cleaned_data['purpose'],
                                                season=form.cleaned_data['season'],
                                                voice=form.cleaned_data['voice'],
                                                size=info.file_size)
        chorus_file.save_uploaded_file(uploaded_file=django_file)
        chorus_file.save()
Beispiel #16
0
def unzip(filename, match_dir=False, destdir=None):
    """
    Extract all files from a zip archive
    filename: The path to the zip file
    match_dir: If True all files in the zip must be contained in a subdirectory
      named after the archive file with extension removed
    destdir: Extract the zip into this directory, default current directory

    return: If match_dir is True then returns the subdirectory (including
      destdir), otherwise returns destdir or '.'
    """
    if not destdir:
        destdir = '.'

    z = ZipFile(filename)
    unzipped = '.'

    if match_dir:
        if not filename.endswith('.zip'):
            raise FileException('Expected .zip file extension', filename)
        unzipped = os.path.basename(filename)[:-4]
        check_extracted_paths(z.namelist(), unzipped)
    else:
        check_extracted_paths(z.namelist())

    # File permissions, see
    # http://stackoverflow.com/a/6297838
    # http://stackoverflow.com/a/3015466
    for info in z.infolist():
        log.debug('Extracting %s to %s', info.filename, destdir)
        z.extract(info, destdir)
        os.chmod(os.path.join(destdir, info.filename),
                 info.external_attr >> 16 & 4095)

    return os.path.join(destdir, unzipped)
Beispiel #17
0
 def extractAll(self):
     logger.info('extracting now.')
     # Open database and check existing files.
     optimizer = ExtractionOptimizer(self.optimizeFile)
     try:
         optimizer.scanDir(self.unpackDir)
         zipFile = ZipFile(self.zip, 'r')
         try:
             # Register new files.
             for zipInfo in zipFile.infolist():
                 if Extractor.__isFile(zipInfo):
                     fileInfo = FileInfo.fromZipInfo(zipInfo, 1)
                     optimizer.registerFile(fileInfo)
             # Update file storage.
             extractor = RawExtractor(self.unpackDir, zipFile)
             for op in optimizer.operations():
                 extractor.extract(op)
         except IOError:
             return Extractor.RESULT_ERROR
         finally:
             zipFile.close()
         
     finally:
         optimizer.close()
     logger.info('extract completed.')
     return Extractor.RESULT_EXTRACTED
Beispiel #18
0
def _download_biogrid_data(url):
    """Downloads zipped, tab-separated Biogrid data in .tab2 format.

    Parameters:
    -----------
    url : str
        URL of the BioGrid zip file.

    Returns
    -------
    csv.reader
        A csv.reader object for iterating over the rows (header has already
        been skipped).
    """
    res = requests.get(biogrid_file_url)
    if res.status_code != 200:
        raise Exception('Unable to download Biogrid data: status code %s'
                        % res.status_code)
    zip_bytes = BytesIO(res.content)
    zip_file = ZipFile(zip_bytes)
    zip_info_list = zip_file.infolist()
    # There should be only one file in this zip archive
    if len(zip_info_list) != 1:
        raise Exception('There should be exactly zipfile in BioGrid zip '
                        'archive: %s' % str(zip_info_list))
    unzipped_bytes = zip_file.read(zip_info_list[0]) # Unzip the file
    biogrid_str = StringIO(unzipped_bytes.decode('utf8')) # Make file-like obj
    csv_reader = csv.reader(biogrid_str, delimiter='\t') # Get csv reader
    next(csv_reader) # Skip the header
    return csv_reader
Beispiel #19
0
def springer():
    DIR = 'JHEP/'
    EXT = ('.xml.Meta', '.xml.scoap')
    BASE_DIR = '/eos/project/s/scoap3repo/BETA/harvesting/Springer/download/' + DIR
    zip_list = listdir(BASE_DIR)

    needed_dois = json.loads(open('/tmp/repo_diff_result2', 'r').read())['only_in_old']

    extracted_dois = {}
    for file in zip_list:
        full_path = BASE_DIR + file
        if isfile(full_path) and full_path.endswith('.zip'):
            try:
                zip = ZipFile(full_path)
                for zip_element in zip.infolist():
                    fn = zip_element.filename
                    if fn.endswith(EXT):
                        xml = parseString(zip.read(zip_element))
                        doi = xml.getElementsByTagName('ArticleDOI')[0].firstChild.nodeValue
                        if doi in needed_dois:
                            if full_path not in extracted_dois:
                                extracted_dois[full_path] = []
                            extracted_dois[full_path].append(doi)
            except BadZipfile as e:
                error('file %s: %s' % (file, e))

    info('%s' % json.dumps(extracted_dois, indent=2))
Beispiel #20
0
def iterate_runs_points(runs):
    ''' Iterate over all the points, skipping share-alike sources.
    '''
    for result in iterate_local_processed_files(runs, sort_on='source_path'):
        if result.run_state.share_alike == 'true':
            continue

        _L.info('Indexing points from {}'.format(result.source_base))
        _L.debug('filename: {}'.format(result.filename))
        _L.debug('run_state: {}'.format(result.run_state))
        _L.debug('code_version: {}'.format(result.code_version))
        with open(result.filename, 'rb') as file:
            result_zip = ZipFile(file)

            csv_infos = [zipinfo for zipinfo in result_zip.infolist()
                         if splitext(zipinfo.filename)[1] == '.csv']

            if not csv_infos:
                break

            zipped_file = result_zip.open(csv_infos[0].filename)
            point_rows = DictReader(TextIOWrapper(zipped_file))

            for row in point_rows:
                try:
                    lat, lon = float(row['LAT']), float(row['LON'])
                except ValueError:
                    # Skip this point if the lat/lon don't parse
                    continue

                # Include this point if it's on Earth
                if -180 <= lon <= 180 and -90 <= lat <= 90:
                    yield Point(lon, lat, result, row)
Beispiel #21
0
    def preloadFont(cls, font, directory=DEFAULT_DIR):
        """
        Load font file into memory. This can be overriden with
        a superclass to create different font sources.
        """

        fontPath = os.path.join(directory, font + ".flf")
        if not os.path.exists(fontPath):
            fontPath = os.path.join(directory, font + ".tlf")
            if not os.path.exists(fontPath):
                raise pyfiglet.FontNotFound("%s doesn't exist" % font)

        if is_zipfile(fontPath):
            z = None
            try:
                z = ZipFile(fontPath, "r")
                data = z.read(z.getinfo(z.infolist()[0].filename))
                z.close()
                return data.decode("utf-8", "replace") if ST3 else data
            except Exception as e:
                if z is not None:
                    z.close()
                raise pyfiglet.FontError("couldn't read %s: %s" % (fontPath, e))
        else:
            try:
                with open(fontPath, "rb") as f:
                    data = f.read()
                return data.decode("utf-8", "replace") if ST3 else data
            except Exception as e:
                raise pyfiglet.FontError("couldn't open %s: %s" % (fontPath, e))

        raise pyfiglet.FontNotFound(font)
Beispiel #22
0
def parse_zipfile_metadata(uploaded_file):
    """
    Given a file, extract out the metadata.json, parse, and return it.
    """
    try:
        zipfile = ZipFile(uploaded_file, 'r')
    except (BadZipfile, zlib.error):
        raise InvalidExtensionData("Invalid zip file")

    if zipfile.testzip() is not None:
        raise InvalidExtensionData("Invalid zip file")

    total_uncompressed = sum(i.file_size for i in zipfile.infolist())
    if total_uncompressed > 5*1024*1024: # 5 MB
        raise InvalidExtensionData("Zip file is too large")

    try:
        metadata = json.load(zipfile.open('metadata.json', 'r'))
    except KeyError:
        # no metadata.json in archive, raise error
        raise InvalidExtensionData("Missing metadata.json")
    except ValueError:
        # invalid JSON file, raise error
        raise InvalidExtensionData("Invalid JSON data")

    zipfile.close()
    return metadata
Beispiel #23
0
def parseZip( fn ):
	date_time = ''
	members = dict()
	removemembers = False
	zipfile = ZipFile( fn )
	cache.invalidate(recordlist.output, 'list_output', )
	files_of_interest = ['infolog.txt','ext.txt','platform.txt','script.txt','settings.txt','unitsync.log','client.txt','information.txt','demo.sdf']
	
	for info in zipfile.infolist():
		if info.filename in files_of_interest and info.file_size < 5 * 1024 * 1024:
			members[info.filename] = zipfile.read( info.filename )
			if info.filename == 'infolog.txt':
				date_time = info.date_time
		else:
			removemembers = True
	
	if removemembers:
		newzipfile = ZipFile (fn + '.new', 'w')
		tmpfilename = '/tmp/' + os.path.basename (fn) + '.tmp'
		for file in members.keys ():
			tmpfile = open (tmpfilename, 'w')
			tmpfile.write (zipfile.read (file))
			tmpfile.close ()
			newzipfile.write (tmpfilename, file)
			os.remove (tmpfilename)
		newzipfile.close ()
		zipfile.close ()
		os.rename (fn, fn + '.orig')
		os.rename (fn + '.new', fn)
	else:
		zipfile.close ()
	
	return db.parseZipMembers( fn, members, date_time )
Beispiel #24
0
def getTranslations(localesDir, defaultLocale, projectName, key):
  result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/export?key=%s' % (projectName, key)).read()
  if result.find('<success') < 0:
    raise Exception('Server indicated that the operation was not successful\n' + result)

  result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/download/all.zip?key=%s' % (projectName, key)).read()
  zip = ZipFile(StringIO(result))
  dirs = {}
  for info in zip.infolist():
    if not info.filename.endswith('.dtd.json') and not info.filename.endswith('.properties.json'):
      continue

    dir, file = os.path.split(info.filename)
    origFile = re.sub(r'\.json$', '', file)
    if not re.match(r'^[\w\-]+$', dir) or dir == defaultLocale:
      continue
    if not dir in dirs:
      dirs[dir] = set()
    dirs[dir].add(origFile)

    data = zip.open(info.filename).read()
    fromJSON(os.path.join(localesDir, dir, origFile), data)

  # Remove any extra files
  for dir, files in dirs.iteritems():
    baseDir = os.path.join(localesDir, dir)
    if not os.path.exists(baseDir):
      continue
    for file in os.listdir(baseDir):
      path = os.path.join(baseDir, file)
      if os.path.isfile(path) and (file.endswith('.properties') or file.endswith('.dtd')) and not file in files:
        os.remove(path)
Beispiel #25
0
def update(restart=True, test=False):
  worker_dir = os.path.dirname(os.path.realpath(__file__))
  update_dir = os.path.join(worker_dir, 'update')
  if not os.path.exists(update_dir):
    os.makedirs(update_dir)

  worker_zip = os.path.join(update_dir, 'wk.zip')
  with open(worker_zip, 'wb+') as f:
    f.write(requests.get(WORKER_URL).content)

  zip_file = ZipFile(worker_zip)
  zip_file.extractall(update_dir)
  zip_file.close()
  prefix = os.path.commonprefix([n.filename for n in zip_file.infolist()])
  fishtest_src = os.path.join(update_dir, prefix)
  fishtest_dir = os.path.dirname(worker_dir) # fishtest_dir is assumed to be parent of worker_dir
  if not test:
    copy_tree(fishtest_src, fishtest_dir)
  else:
    file_list = os.listdir(fishtest_src)
  shutil.rmtree(update_dir)

  print("start_dir: " + start_dir)
  if restart:
    do_restart()

  if test:
    return file_list
Beispiel #26
0
def dump_files(zipfile: ZipFile):
    """Dump packed files to a location.
    """
    dump_folder = CONF['packfile_dump', '']
    if not dump_folder:
        return

    dump_folder = os.path.abspath(dump_folder)

    # Delete files in the folder, but don't delete the folder itself.
    try:
        files = os.listdir(dump_folder)
    except FileNotFoundError:
        return

    for name in files:
        name = os.path.join(dump_folder, name)
        if os.path.isdir(name):
            try:
                shutil.rmtree(name)
            except OSError:
                # It's possible to fail here, if the window is open elsewhere.
                # If so, just skip removal and fill the folder.
                pass
        else:
            os.remove(name)

    for zipinfo in zipfile.infolist():
        zipfile.extract(zipinfo, dump_folder)
Beispiel #27
0
def main(opts):
    for filename in opts.files:
        zip_file = ZipFile(filename)
        info_list = zip_file.infolist()
        for entry in info_list:
            entry.filename = decode_filename(entry.filename, opts.codepage)
            zip_file.extract(entry, path=opts.dest)
Beispiel #28
0
    def openZip(cls, fo):
        z = ZipFile(fo)
        zfiles = z.infolist()
        if len(zfiles) == 1:
            return z.open(zfiles[0].filename)

        raise NotAFBZException()
Beispiel #29
0
 def extractAll(self):
     call(self.progress, 'begin_extract')
     # Open database and check existing files.
     optimizer = ExtractionOptimizer(self.optimizeFile)
     optimizer.scanDir(self.unpackDir)
     # Open and read zip file.
     zipFile = ZipFile(self.zip, 'r')
     success = True
     try:
         # Register new files.
         for zipInfo in zipFile.infolist():
             if Extractor.__isFile(zipInfo):
                 fileInfo = FileInfo.fromZipInfo(zipInfo, 1)
                 optimizer.registerFile(fileInfo)
         # Update files.
         extractor = RawExtractor(self.unpackDir, zipFile)
         for op in optimizer.operations():
             success &= extractor.extract(op)
             call(self.progress, 'do_extract', optimizer.currentIndex,
                     optimizer.maxSize)
         # Commit new fileset.
         if success:
             optimizer.commit()
     finally:
         zipFile.close()
     call(self.progress, 'end_extract')
     return success
Beispiel #30
0
def extract_subfiles(source_path, dest_path, verbose=False):
    if os.path.isdir(source_path):
        for dirpath, dirnames, filenames in os.walk(source_path):
            relpath = os.path.relpath(dirpath, source_path)
            new_dir_path = os.path.join(dest_path, relpath)
            if not os.path.isdir(new_dir_path):
                os.mkdir(new_dir_path)
            for filename in filenames:
                try:
                    source_file_path = os.path.join(dirpath, filename)
                    relpath = os.path.relpath(source_file_path, source_path)
                    dest_file_path = os.path.join(dest_path, relpath)
                    print dest_file_path
                    if dest_file_path.endswith('.cod'):
                        zip = ZipFile(source_file_path)
                        for info in zip.infolist():
                            if verbose:
                                print '    %s (extracted)' % info.filename
                            dest_unzip_path = os.path.split(dest_file_path)[0] 
                            if not os.path.realpath(os.path.join(dest_unzip_path, info.filename)).startswith(os.path.realpath(dest_unzip_path)):
                                raise(Exception('Security exception: zip file %s attempted to extract to a non-local location' % info.filename))
                            zip.extract(info, path = dest_unzip_path)
                    else:
                        shutil.copyfile(source_file_path, dest_file_path)
                except Exception, e:
                    if str(e) == 'File is not a zip file':
                        # this is a cod file or some other file
                        shutil.copyfile(source_file_path, dest_file_path)
                    else:
                        if verbose:
                            print >>sys.stderr, 'Error:',
                            print >>sys.stderr, str(e)
                        raise(e)
Beispiel #31
0
 def parse(self, response):
     if response.status == 200:
         if response.request.meta['type'] == 'meta':
             data = json.loads(response.body_as_unicode())
             for resource in data['result']['resources']:
                 if resource['format'].upper() == 'JSON':
                     yield scrapy.Request(
                         url=resource['url'],
                         meta={'type': 'data'}
                     )
         else:
             zip_file = ZipFile(BytesIO(response.body))
             for finfo in zip_file.infolist():
                 data = zip_file.open(finfo.filename).read()
                 yield self.save_data_to_disk(data, finfo.filename, data_type='release_package', url=response.request.url)
     else:
         yield {
             'success': False,
             'file_name': hashlib.md5(response.request.url.encode('utf-8')).hexdigest() + '.json',
             'url': response.request.url,
             'errors': {'http_code': response.status}
         }
Beispiel #32
0
def get_text_docs(corpus_zipfile: ZipFile) -> ImmutableDict[str, str]:
    print(f"Reading .ltf documents in {corpus_zipfile.filename}")

    prefix = get_root_dir_name(corpus_zipfile) or ""

    parent_children_path = _find_name_in_zip(
        corpus_zipfile, re.compile(f"{prefix}docs/parent_children.tab"))

    if not parent_children_path:
        raise RuntimeError("Archive lacks parent_children.tab")

    parent_children_tab = _read_tab_file(
        CharSource.from_file_in_zip(corpus_zipfile, parent_children_path))

    child_to_parent_map = _create_child_to_parent_map(parent_children_tab)

    text_docs = {}
    text_dir = ZipPath(corpus_zipfile, at="data/ltf/")

    for source_doc_path in text_dir.iterdir():
        source_doc_zip = ZipFile(io.BytesIO(source_doc_path.read_bytes()))

        for source_info in tqdm(
                source_doc_zip.infolist(),
                desc=f"Extracting {source_doc_path.name}",
                bar_format="{l_bar}{bar:20}{r_bar}",
        ):

            doc = ZipPath(source_doc_zip, at=source_info.filename)
            try:
                doceid = doc.name.split(".")[0]
                doc_id = child_to_parent_map[doceid]
                text_docs[doc_id] = convert_ltf_to_raw_text(
                    doc.read_text(encoding="utf-8"))
            except AttributeError:
                raise FileNotFoundError(f"Could not read from {doc}.")

    return immutabledict(text_docs)
    def push_libs(self):
        if self.options.local_apk:
            with mozfile.TemporaryDirectory() as tmpdir:
                apk_contents = ZipFile(self.options.local_apk)
                szip = os.path.join(self.options.local_bin, '..', 'host', 'bin', 'szip')
                if not os.path.exists(szip):
                    # Tinderbox builds must run szip from the test package
                    szip = os.path.join(self.options.local_bin, 'host', 'szip')
                if not os.path.exists(szip):
                    # If the test package doesn't contain szip, it means files
                    # are not szipped in the test package.
                    szip = None

                for info in apk_contents.infolist():
                    if info.filename.endswith(".so"):
                        print >> sys.stderr, "Pushing %s.." % info.filename
                        remote_file = posixpath.join(self.remote_bin_dir, os.path.basename(info.filename))
                        apk_contents.extract(info, tmpdir)
                        file = os.path.join(tmpdir, info.filename)
                        if szip:
                            out = subprocess.check_output([szip, '-d', file], stderr=subprocess.STDOUT)
                        self.device.pushFile(os.path.join(tmpdir, info.filename), remote_file)
            return

        elif self.options.local_lib:
            for file in os.listdir(self.options.local_lib):
                if file.endswith(".so"):
                    print >> sys.stderr, "Pushing %s.." % file
                    remote_file = posixpath.join(self.remote_bin_dir, file)
                    self.device.pushFile(os.path.join(self.options.local_lib, file), remote_file)
            # Additional libraries may be found in a sub-directory such as "lib/armeabi-v7a"
            local_arm_lib = os.path.join(self.options.local_lib, "lib")
            if os.path.isdir(local_arm_lib):
                for root, dirs, files in os.walk(local_arm_lib):
                    for file in files:
                        if (file.endswith(".so")):
                            remote_file = posixpath.join(self.remote_bin_dir, file)
                            self.device.pushFile(os.path.join(root, file), remote_file)
    def push_libs(self):
        if self.options.local_apk:
            with mozfile.TemporaryDirectory() as tmpdir:
                apk_contents = ZipFile(self.options.local_apk)

                for info in apk_contents.infolist():
                    if info.filename.endswith(".so"):
                        print >> sys.stderr, "Pushing %s.." % info.filename
                        remote_file = posixpath.join(self.remote_bin_dir, os.path.basename(info.filename))
                        apk_contents.extract(info, tmpdir)
                        local_file = os.path.join(tmpdir, info.filename)
                        with open(local_file) as f:
                            # Decompress xz-compressed file.
                            if f.read(5)[1:] == '7zXZ':
                                cmd = ['xz', '-df', '--suffix', '.so', local_file]
                                subprocess.check_output(cmd)
                                # xz strips the ".so" file suffix.
                                os.rename(local_file[:-3], local_file)
                        self.device.pushFile(local_file, remote_file)

        elif self.options.local_lib:
            for file in os.listdir(self.options.local_lib):
                if file.endswith(".so"):
                    print >> sys.stderr, "Pushing %s.." % file
                    remote_file = posixpath.join(self.remote_bin_dir, file)
                    local_file = os.path.join(self.options.local_lib, file)
                    self.device.pushFile(local_file, remote_file)
            # Additional libraries may be found in a sub-directory such as "lib/armeabi-v7a"
            for subdir in ["assets", "lib"]:
                local_arm_lib = os.path.join(self.options.local_lib, subdir)
                if os.path.isdir(local_arm_lib):
                    for root, dirs, files in os.walk(local_arm_lib):
                        for file in files:
                            if (file.endswith(".so")):
                                print >> sys.stderr, "Pushing %s.." % file
                                remote_file = posixpath.join(self.remote_bin_dir, file)
                                local_file = os.path.join(root, file)
                                self.device.pushFile(local_file, remote_file)
Beispiel #35
0
def do(zip_file_name, include):
    print('zip_file_name:', zip_file_name)
    print('Include files:', include)

    # Измененный zip
    out_zip_file_name = '_' + zip_file_name

    try:
        print('open {} and {} zip arhives'.format(zip_file_name,
                                                  out_zip_file_name))
        zin = ZipFile(zip_file_name, 'r')
        zout = ZipFile(out_zip_file_name, 'w')

        print('start fill {} zip arhive'.format(out_zip_file_name))

        for item in zin.infolist():
            buffer = zin.read(item.filename)

            if any((fnmatch.fnmatch(item.filename, pattern)
                    for pattern in include)):
                zout.writestr(item, buffer)
            else:
                print('Delete', item.filename)

        print('finish fill {} zip arhive'.format(out_zip_file_name))

    finally:
        zout.close()
        zin.close()

        # Удаляем оригинальный
        print('remove original {} zip file'.format(zip_file_name))
        os.remove(zip_file_name)

        # Переименновываем измененный zip в оригинальный
        print('rename {} zip file as original {}'.format(
            out_zip_file_name, zip_file_name))
        os.rename(out_zip_file_name, zip_file_name)
Beispiel #36
0
    def save_file_data(settle_date,
                       data,
                       temp_path,
                       merchant_id,
                       temp_prefix='unionpay_'):
        '''
        @settle_date:   like 1216 for generate filename
        @data:          fileContent from request
        @temp_path:     save data to a temp path

        '''
        timeRandomString = datetime.now().strftime("%Y%m%d%H%M%S")
        path = os.path.join(
            temp_path,
            "%s%s%s" % (temp_prefix, datetime.now().year, settle_date))

        if not os.path.exists(path):
            os.mkdir(path)

        fileWholePath = "%s/SMT_%s.zip" % (path, timeRandomString)
        with open(fileWholePath, 'wb') as f:
            f.write(data)
        logger.debug("temp file <%s> created!" % fileWholePath)
        zfile = ZipFile(fileWholePath, 'r')
        zfile.extractall(path)
        files_list = zfile.infolist()
        logger.debug("file <%s> unziped!" % ','.join(zfile.namelist()))
        zfile.close()
        logger.debug("balance file <%s> saved!" % path)
        os.unlink(fileWholePath)
        logger.debug("temp file deleted")

        balance_files = []

        for item in files_list:
            if Signer.accept_filetype(item.filename, merchant_id):
                balance_files.append(os.path.join(path, item.filename))
        return balance_files
Beispiel #37
0
def _extract_zip(archivefile: zipfile.ZipFile, name: str, rep: str) -> bool:
    """Extracts a zip file"""
    mkdir(rep, mode=0o711)
    try:
        for member in archivefile.infolist():
            # zipfile member names are sanitized
            archivefile.extract(member, rep)
            member_location = joinpath(rep, member.filename)
            # python has no option to use umask while extracting, so…
            if isdir(member_location):
                chmod(member_location, 0o711)
            else:
                chmod(member_location, 0o644)
    except: # extraction failed, remove leftover files
        import traceback
        log.info('Extraction of %s failed, falling back to single-file upload',
                 name, exc_info=True)
        rmtree(rep)
        return False
    else:
        remove(name)
        log.info('Successfully extracted zipfile %s into %s', name, rep)
        return True
def find_single_file_in_zip(zf: zipfile.ZipFile,
                            path: str) -> typing.Optional[zipfile.ZipInfo]:
    infos = zf.infolist()
    if not infos:
        print(
            f"Warning: ZIP data for path {path!r} doesn't contain any files.",
            file=sys.stderr)
        return None
    elif len(infos) > 1:
        print(
            f"Warning: ZIP data for path {path!r} contains more than one file.",
            file=sys.stderr)
        return None

    (info, ) = infos
    split_path = path.split("\\")
    if info.filename != split_path[-1]:
        print(
            f"Warning: ZIP data for path {path!r} contains a differently named file {info.filename!r}.",
            file=sys.stderr)
        return None

    return info
Beispiel #39
0
    def close_zip(self, zipfile, name):
        oldzipfilename = os.path.join(self.dst_root, name)
        zipfilename = os.path.join(self.tmp_root, name)
        try:
            oldzipfile = ZipFile(oldzipfilename, 'r')
        except IOError:
            uptodate = False
        else:
            old = set([(zi.filename, zi.CRC) for zi in oldzipfile.infolist()])
            new = set([(zi.filename, zi.CRC) for zi in zipfile.infolist()])
            uptodate = (old == new)
            oldzipfile.close()

        zipfile.close()
        if uptodate:
            stat = os.stat(oldzipfilename)
            os.utime(zipfilename, (stat.st_atime, stat.st_mtime))

        if self.verbosity == '2':
            if uptodate:
                self.stdout.write(u"\x1b[3D\x1b[32munchanged\x1b[0m")
            else:
                self.stdout.write(u"\x1b[3D\x1b[32mzipped\x1b[0m")
Beispiel #40
0
    def clean(self, zip_filename):
        temp_filename = zip_filename + self.TEMP_EXTENSION
        removed = False

        zip_in = ZipFile(zip_filename)
        zip_out = ZipFile(temp_filename, 'w')
        for item in zip_in.infolist():
            is_file = not item.is_dir()
            if is_file and item.filename != self.remove_filename:
                buffer = zip_in.read(item.filename)
                zip_out.writestr(item.filename, buffer)
            elif is_file:
                removed = True
        zip_out.close()
        zip_in.close()

        if removed:
            os.remove(zip_filename)
            os.rename(temp_filename, zip_filename)
        else:
            os.remove(temp_filename)

        return removed
Beispiel #41
0
    def _unzip_file(self, file_):
        results = []
        if not zipfile.is_zipfile(file_):
            return results

        try:
            zf = ZipFile(file_, 'r')
            filelist = zf.infolist()

            for fileentry in filelist:
                unzipped_file = zf.open(fileentry, pwd=self._PASSWORD).read()
                results.append({
                    'filename': fileentry.filename,
                    'data': unzipped_file
                })
        except Exception as e:
            zf.close()
            file_.close()
            raise

        zf.close()
        file_.close()
        return results
    def load(self) -> None:
        if path.isdir(self.zip_or_dir_path):
            print(
                f'Directories are not supported by {self.__class__.__name__}')
            return None

        zip_file = ZipFile(self.zip_or_dir_path)
        files_by_loader = dict()
        for file_info in zip_file.infolist():
            if file_info.filename.endswith('/'):
                continue
            for prefix, LoaderClass in DictionaryLoader.dictionary_loader_by_file_prefix.items(
            ):
                if file_info.filename.startswith(prefix):
                    files = files_by_loader.get(LoaderClass)
                    if not files:
                        files = list()
                        files_by_loader[LoaderClass] = files
                    files.append(file_info)
                    break
        for LoaderClass, files in files_by_loader.items():
            loader = LoaderClass(zip_file=zip_file, files=files)
            loader.load_once()
Beispiel #43
0
    def _crc32(self, path):
        if self._infodict:
            return self._infodict[path]

        try:
            archive = ZipFile(self.src)
        except BadZipfile:
            e = get_exception()
            if e.args[0].lower().startswith('bad magic number'):
                # Python2.4 can't handle zipfiles with > 64K files.  Try using
                # /usr/bin/unzip instead
                self._legacy_file_list()
            else:
                raise
        else:
            try:
                for item in archive.infolist():
                    self._infodict[item.filename] = long(item.CRC)
            except:
                archive.close()
                raise UnarchiveError('Unable to list files in the archive')

        return self._infodict[path]
Beispiel #44
0
async def download_and_extract_ppt(
        ppt_zip_link: str,
        index: int,
        download_basepath: Optional[Path] = None) -> None:
    if download_basepath is None:
        download_basepath = Path(FLAGS.download_basedir)

    log.info(f"processing {ppt_zip_link}")
    t = urlparse(ppt_zip_link)
    assert t.path.endswith("hymn-{index:03}.zip")

    ppt_zip_path = download_basepath / Path(t.path).name
    if ppt_zip_path.exists():
        log.info(f"{ppt_zip_path} exists. use it as cache.")
        with ppt_zip_path.open("rb") as f:
            content = f.read()
    else:
        ppt_missing_path = download_basepath / f"{Path(t.path).name}.missing"
        if ppt_missing_path.exists():
            log.warn(f"{ppt_missing_path} exists. stop.")
            return
        status, content = await fetch(ppt_zip_link)
        if status == 200:
            with ppt_zip_path.open("wb") as out:
                out.write(content)
        elif status in (404, 503):
            log.warn(f"write {ppt_missing_path}. stop.")
            ppt_missing_path.open("wb").close()
            return
        else:
            log.error(f"status={status}")
            return

    log.info(f"extract {ppt_zip_path}")
    zf = ZipFile(BytesIO(content))
    infolist = zf.infolist()
    assert len(infolist) == 1
Beispiel #45
0
    def handle(self, *args, **options):
        # maybe this should be replaced with a CLI options:
        do_upload = os.environ.get('AGREEMENTS_S3_UPLOAD_ENABLED', False)

        Agreement.objects.all().delete()
        Issuer.objects.all().delete()

        agreements_zip = ZipFile(options['path'])

        # Zip files default to IBM Code Page 437 encoding unless a specific bit
        # is set. See Appendix D in the zip file spec:
        # https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
        all_pdfs = [
            info.filename if
            (info.flag_bits
             & 0x800) == 0 else force_str(info.filename, 'cp437')
            for info in agreements_zip.infolist()
            if info.filename.upper().endswith('.PDF')
        ]

        blanks = empty_folder_test(agreements_zip, all_pdfs)
        if blanks:
            error_msg = ("Processing error: Blank folders were found "
                         "in the source zip file:\n{}".format(", ".join(
                             [folder for folder in blanks])))
            raise CommandError(error_msg)

        if options['verbosity'] >= 1:
            output_file = self.stdout
        else:
            output_file = open(os.devnull, 'a')

        for pdf_path in all_pdfs:
            _util.save_agreement(agreements_zip,
                                 pdf_path,
                                 output_file,
                                 upload=do_upload)
Beispiel #46
0
def process_zip_file(path_zip_file, filename, formato):
    global total_registros_procesados, total_registros_insertados, total_registros_excluidos, msg_error_column, good_files, bad_files, duplicados
    formato_excel = set_formato_excel(formato)

    archivo_zip = ZipFile(path_zip_file, 'r')
    content_of_zip = archivo_zip.infolist()
    good_files = []
    bad_files = []
    duplicados = []
    extension = (".xls", ".xlsx")
    for s in content_of_zip:
        duplicados = []
        if s.filename.endswith(extension):
            print(s.filename)
            try:
                df = pd.read_excel(archivo_zip.open(s.filename, 'r'),
                                   converters=formato_excel)
                process_df = df[df.FECHA.notnull()]
                df_final = process_df.fillna(0)
                reg_procesados, reg_insertados, reg_excluidos = save_registers_in_database(
                    df_final, s.filename, formato, duplicados)
                good_files.append({
                    'filename': s.filename,
                    'status': status_indiv_file,
                    'registros_procesados': reg_procesados,
                    'registros_insertados': reg_insertados,
                    'registros_excluidos': reg_excluidos,
                    'registros_duplicados_detalle': duplicados
                })
                total_registros_procesados += reg_procesados
                total_registros_insertados += reg_insertados
                total_registros_excluidos += reg_excluidos
            except AttributeError as e:
                indice = str(e).find('attribute')
                error = msg_error_column + str(e)[indice + 9:]
                bad_files.append({'file': s.filename, 'problema': error})
                save_file_upload_error(s.filename, error)
Beispiel #47
0
def load_swig():
    if not is_windows:
        return

    if which('swig'):
        return

    if not os.path.exists(SWIG_LOCAL_FILENAME):
        print('[+] Downloading file from %s' % SIWG_ZIP_URL)
        response = urlopen(SIWG_ZIP_URL)
        data = response.read()

        with open(SWIG_LOCAL_FILENAME, 'wb') as fp:
            fp.write(data)
    else:
        print('[+] Use %s from local' % SWIG_LOCAL_FILENAME)
        with open(SWIG_LOCAL_FILENAME, 'rb') as fp:
            data = fp.read()

    print('[*] Check if file hash match %s' % SWIG_ZIP_HASH)
    assert sha256(data).hexdigest().lower() == SWIG_ZIP_HASH

    print('[*] Read zip file')
    zfile = ZipFile(BytesIO(data))
    pathname = zfile.infolist()[0].filename
    if os.path.exists(pathname):
        print('[+] Zip file already extracted')
    else:
        print('[+] Extracting files')
        zfile.extractall('.')

    path = os.getenv('PATH')
    swig_path = os.path.join(os.path.abspath('.'), 'swigwin-4.0.1')
    new_path = swig_path + os.path.pathsep + path
    os.putenv('PATH', new_path)
    print('New $PATH:')
    pprint(new_path.split(os.path.pathsep))
Beispiel #48
0
    def _update_version1(self, filepath):
        logging.debug('Updating from "version 1"')

        oldzip = ZipFile(filepath, 'r')
        newzip = ZipFile(filepath + ".new", 'w')

        # Update stats.cfg
        config = ConfigParser()
        config.read(oldzip.open('stats.cfg', 'r'))

        limit = config.stats.convergence_limit
        del config.stats.convergence_limit
        config.stats.convergor = '<CompositionConvergor(limit=%s)>' % limit

        fp = StringIO()
        config.write(fp)
        newzip.writestr('stats.cfg', fp.getvalue())

        # Add other files to new zip
        for zipinfo in oldzip.infolist():
            if zipinfo.filename == 'stats.cfg':
                continue

            data = oldzip.read(zipinfo)
            newzip.writestr(zipinfo, data)

        # Add version
        newzip.comment = 'version=%s' % VERSION

        oldzip.close()
        newzip.close()

        # Remove old zip and replace with new one
        os.remove(filepath)
        os.rename(filepath + ".new", filepath)

        return filepath
    def compare_site_backups(self, backup1, backup2):
        """  Compares 2 site backups

        Compares critical site backup files and ensures they are the same:
            - site database
            - site floorplan

        Variables
            *backup1*
                - first site backup file
            *backup2*
                - second site backup file

        .. code:: robotframework

            *** Test Cases ***
            Sample
                Compare site backups   .//artifacts//site_backup1.zip   .//artifacts//site_backup2.zip
        """

        checklist = list()
        checklist.append('.sqlite')
        checklist.append('.egf.gz')

        assert os.path.exists(backup1), ImportError(
            'Unable to find file {0}'.format(backup1))
        assert os.path.exists(backup2), ImportError(
            'Unable to find file {0}'.format(backup2))

        _backup1 = ZipFile(backup1)
        _backup2 = ZipFile(backup2)

        for item in _backup1.infolist():
            for check in checklist:
                if check in item.filename:
                    assert item.file_size == _backup2.getinfo(item.filename).file_size, \
                        AssertionError('File {0} size does not match!'.format(item.filename))
Beispiel #50
0
def remove_meta_inf(p_apk):
    if isinstance(p_apk, str):
        raise Exception("p_apk is not str type")

    zip = ZipFile(p_apk)
    for l in zip.namelist():
        if l.startswith('META-INF/'):
            is_signed = True
            break
    else:
        is_signed = False

    if is_signed:
        unsinged_apk_path = str(p_apk.resolve())[:-4] + "-unsigned.apk"
        zout = ZipFile(unsinged_apk_path, 'w')
        for item in zip.infolist():
            buffer = zip.read(item.filename)
            if not item.filename.startswith('META-INF/'):
                zout.writestr(item, buffer)
        zout.close()
        zip.close()
        return Path(unsinged_apk_path)

    return p_apk
Beispiel #51
0
def clean_xml_in_zip(zip_name):
    # construct a list (name, data) for all XML files in the ZIP
    xml_list = []
    xml_names = []
    zf = ZipFile(zip_name, 'r')
    zl = zf.infolist()
    word_re = re.compile(
        ur'<w:lang( w:[a-zA-Z]{1,16}="[a-zA-Z\-]{1,10}"){1,5}/>', re.UNICODE)
    ppt_re = re.compile(ur' lang="[a-zA-Z\-]{1,10}"', re.UNICODE)
    for x in zl:
        if x.filename[-4:] == '.xml':
            xml_names.append(x.filename)
            xml = zf.open(x, 'rU').read().decode('utf-8')
            # MS Word: completely remove tags with language codes
            xml = word_re.sub('', xml)
            # MS PowerPoint: strip language attribute from tags with language codes
            xml = ppt_re.sub('', xml)
            xml_list.append((x.filename, xml.encode('utf-8')))
    zf.close()
    # create a temporary file
    tmp_fd, tmp_name = mkstemp(dir=os.path.dirname(zip_name))
    os.close(tmp_fd)
    # create a copy of the archive without XML files
    with ZipFile(zip_name, 'r') as src_zip:
        with ZipFile(tmp_name, 'w') as dst_zip:
            dst_zip.comment = src_zip.comment  # preserve the comment (if any)
            for item in src_zip.infolist():
                if not (item.filename in xml_names):
                    dst_zip.writestr(item, src_zip.read(item.filename))
    # replace the original with the temporary archive
    os.remove(zip_name)
    os.rename(tmp_name, zip_name)
    # add XML files with their new data
    with ZipFile(zip_name, mode='a', compression=ZIP_DEFLATED) as zf:
        for x in xml_list:
            zf.writestr(x[0], x[1])
def extract_files(input_file, output_folder):
    '''Extract files embedded in given MS Office document.'''
    # This code was taken from:
    # https://bitdrop.st0w.com/2010/07/23/python-extracting-a-file-from-a-zip-file-with-a-different-name/
    # It allows to extract a file and rename it at the same time
    if not zipfile.is_zipfile(input_file):
        print(bcolors.WARNING + "\n{} not a zipfile.".format(input_file))
        print("Please remove this file from watchfolder.")
        input("Press ENTER to continue:" + bcolors.ENDC)
        return 1

    zipdata = ZipFile(input_file)
    zipinfos = zipdata.infolist()

    filecount = 0  # Counte for the number of extracted files

    print("\nStart extraction of embedded files...")

    for zipinfo in zipinfos:
        # Check if file in dedicated folder or of requested filetype
        if re.search(
                "(embeddings|media).+(bin|jpg|jpeg|doc|docx|xls|xlsx|ppt|pptx|pdf)",
                zipinfo.filename):
            # Rename files with extension .bin to .pdf
            zipinfo.filename = re.sub('\.bin$', '.pdf', zipinfo.filename)
            # Extract the file
            zipdata.extract(zipinfo, output_folder)
            filecount += 1
            print("\t=>{}".format(zipinfo.filename))

    if not filecount:
        print("\tNo embedded files found to extract.")

    print("Extracted {} of {} files.".format(filecount, len(zipinfos)))

    return 0
Beispiel #53
0
    def _parse(self, f, options={}):
        print "Parsing ZIP"
        return []
        information = []

        zipfile = ZipFile(f)

        # try:
        for x in zipfile.infolist():
            # Add File to index
            data = self.createData("strings", "FILE", filename=x.filename)
            data["CRC"] = x.CRC
            information.append(data)

            # Process File
            parser = P.instance().get_parser(x.filename)

            if parser is None:
                continue

            try:
                before = datetime.datetime.now()
                results = parser.parse(zipfile.open(x.filename),
                                       parent=self.filename_w)
                after = datetime.datetime.now()
                if (after - before) > datetime.timedelta(seconds=1):
                    print "Parsed %s in %s" % (x.filename, str(after - before))

                if results is not None:
                    information += results

            except Exception as e:
                print "Cannot process %s" % x.filename
                print e

        return information
Beispiel #54
0
def unzip(zfilename):
    '''Unzip a zip archive to the working directory.

    Place that archive in working dir if it is a root archive.
    Else, place in a subdirectory.

    Arguments
    ---------
    zfilename : string, name of zip file. Must contain the .zip extension.
    '''

    assert type(zfilename) is str, "zfilename is not a string: %r" % zfilename
    assert zfilename[-3:] == 'zip', "zfilename is not a .zip: %r" % zfilename

    try:
        zdir = zfilename.split('.')[-2]

        file = ZipFile(zfilename, "r")
    except:
        print("no such file or unable to open " + zfilename)

    #pull out the filenames from the archive
    filez = []
    for info in file.infolist():
        filez.append(info.filename)

    #if all files are in subdirectories, unzip to current dir
    if all(["/" in s for s in filez]):
        with ZipFile(zfilename) as myzip:
            myzip.extractall(".")
            print("successfully unzipped to current directory")
    #else, at least one file located at root of archive; unzip to dir
    else:
        with ZipFile(zfilename) as myzip:
            myzip.extractall(zdir)
            print("successfully unzipped to " + zdir + "/")
Beispiel #55
0
    def getData():
        dfs = []
        years = []
        for zip_filename in zips:
            zip_file = ZipFile(zip_filename)
            new = {
                text_file.filename: pd.read_csv(
                    zip_file.open(text_file.filename),
                    header=8,
                    sep=";",
                    quotechar='"',
                    encoding="ISO-8859-1",
                )
                for text_file in zip_file.infolist()
                if text_file.filename.upper().endswith(".CSV")
                and search.upper().strip() in Path(
                    text_file.filename.upper()).stem.strip()
            }
            year = Path(zip_filename).stem
            if len(new) == 0:
                continue
            _, new = list(new.items())[0]
            try:
                new["time"] = new["Data"].apply(
                    lambda x: datetime.datetime.strptime(x, "%Y/%m/%d"))
            except KeyError:
                new["time"] = new["DATA (YYYY-MM-DD)"].apply(
                    lambda x: datetime.datetime.strptime(
                        str(datetime.datetime.strptime(x, "%Y-%m-%d"))[:-9],
                        "%Y-%m-%d"))

            new[column] = new[column].str.replace("-9999", "0")
            new[column] = new[column].str.replace(",", ".").astype(float)
            dfs += [new]
            years.append(year)
        return years, dfs
def download(url, destination, compression=None):

    try:
        os.makedirs(destination)
    except:
        pass

    print('Downloading', url)
    data = urlopen(url).read()


    if compression == 'zip':
        data = ZipFile(StringIO(data))
        for f in data.infolist():
            data.extract(f, path=destination)

    else:
        filename = os.path.split(url)[-1]
        if compression == 'gz':
            filename = os.path.splitext(filename)[0]
            data = zlib.decompress(data, 16+zlib.MAX_WBITS)

        with open(os.path.join(destination, filename), 'wb') as f:
            f.write(data)
Beispiel #57
0
def convert_doc(document_file):
    '''Given a filename or a file object of a ODT file (a zip file really)
    returns a converted file object'''

    file_in = ZipFile(document_file)
    styles = etree.parse(file_in.open('styles.xml'))
    content = etree.parse(file_in.open('content.xml'))

    style_mapping = {}
    convert_styles(styles, style_mapping)
    convert_styles(content, style_mapping)
    convert_content(content, style_mapping)

    # build a new odt file in memory
    fp = BytesIO()
    file_out = ZipFile(fp, mode='w', compression=file_in.compression)
    for zinfo in file_in.infolist():
        name = zinfo.filename
        if name not in ('styles.xml', 'content.xml'):
            file_out.writestr(zinfo, file_in.read(zinfo))
    file_out.writestr('styles.xml', etree.tostring(styles, encoding='utf8'))
    file_out.writestr('content.xml', etree.tostring(content, encoding='utf8'))
    file_out.close()
    return fp
Beispiel #58
0
    def assertInZip(self, expected_files, filename, equal=True):
        """
        Check if the given `expected_files` exists in the Zip archive.
        """
        new_filename = None
        try:
            # If a stream is provided, dump it a file. ZipFile doesn't read file from a stream.
            if not isinstance(filename, str):
                f = filename
                filename = new_filename = tempfile.mktemp(
                    prefix='rdiffweb_test_archiver_', suffix='.zip')
                with io.open(new_filename, 'wb') as out:
                    byte = f.read(4096)
                    while byte:
                        out.write(byte)
                        byte = f.read(4096)
                f.close()

            # Get data from zip.
            actual = {}
            a = ZipFile(filename)
            for m in a.infolist():
                name = m.filename
                if isinstance(name, bytes):
                    name = name.decode('utf8')
                actual[name] = m.file_size
            a.close()
            # Compare.
            if equal:
                self.assertEqual(expected_files, actual)
            else:
                for expected_file in expected_files:
                    self.assertIn(expected_file, actual)
        finally:
            if new_filename:
                os.remove(new_filename)
Beispiel #59
0
    def open_file(self):
        '''
        Open a ROM.
        '''
        rom_fn, _ = QFileDialog.getOpenFileName(
            self, "Open ROM", os.getcwd(),
            "Sega Genesis ROMs (*.bin *.gen *.zip)")

        if not rom_fn:
            return

        self.rom_fn = rom_fn

        if is_zipfile(rom_fn):
            # if the file is a ZIP, try to open the largest file inside
            zipfile = ZipFile(rom_fn, 'r')
            contents = [(f.file_size, f.filename) for f in zipfile.infolist()]
            contents.sort(reverse=True)
            rom = zipfile.read(contents[0][1])
        else:
            rom = open(rom_fn, 'r').read()
        md.set_rom(c_char_p(rom), len(rom))
        self.reset_emulation()
        self.activateWindow()
Beispiel #60
0
def test_pydist():
    """Make sure pydist.json exists and validates against our schema."""
    # XXX this test may need manual cleanup of older wheels

    import jsonschema

    def open_json(filename):
        return json.loads(open(filename, 'rb').read().decode('utf-8'))

    pymeta_schema = open_json(resource_filename('wheel.test',
                                                'pydist-schema.json'))
    valid = 0
    for dist in ("simple.dist", "complex-dist"):
        basedir = pkg_resources.resource_filename('wheel.test', dist)
        for (dirname, subdirs, filenames) in os.walk(basedir):
            for filename in filenames:
                if filename.endswith('.whl'):
                    whl = ZipFile(os.path.join(dirname, filename))
                    for entry in whl.infolist():
                        if entry.filename.endswith('/metadata.json'):
                            pymeta = json.loads(whl.read(entry).decode('utf-8'))
                            jsonschema.validate(pymeta, pymeta_schema)
                            valid += 1
    assert valid > 0, "No metadata.json found"