def get_data_as_png(obj): im = Image.fromarray(obj, mode='L') data = BytesIO() im.save(data, format="png") data.flush() data.seek(0) return data
def LeoCam_main(): camera = PiCamera() camera.framerate = 32 camera.resolution = (648, 486) camera.brightness = 55 #give the sensor time to set its light levels sleep(1) while True: try: #timestamp now = str(datetime.now()) stream = BytesIO() camera.capture(stream, format="jpeg") image_64_encode = base64.b64encode(stream.getvalue()) data_for_js = "data:image/jpeg;base64,{}".format(image_64_encode) r = post("http://127.0.0.1:8080/" + now, data=data_for_js) if (r.status_code != 200): print(r.status_code, r.reason) break finally: stream.flush()
def _run_step(self, step_num, step_type, input_path, output_path, working_dir, env, child_stdin=None): step = self._get_step(step_num) # if no mapper, just pass the data through (see #1141) if step_type == 'mapper' and not step.get('mapper'): copyfile(input_path, output_path) return # Passing local=False ensures the job uses proper names for file # options (see issue #851 on github) common_args = (['--step-num=%d' % step_num] + self._mr_job_extra_args(local=False)) if step_type == 'mapper': child_args = ( ['--mapper'] + [input_path] + common_args) elif step_type == 'reducer': child_args = ( ['--reducer'] + [input_path] + common_args) elif step_type == 'combiner': child_args = ['--combiner'] + common_args + ['-'] has_combiner = (step_type == 'mapper' and 'combiner' in step) try: # Use custom stdout if has_combiner: child_stdout = BytesIO() else: child_stdout = open(output_path, 'wb') with save_current_environment(): with save_cwd(): os.environ.update(env) os.chdir(working_dir) child_instance = self._mrjob_cls(args=child_args) child_instance.sandbox(stdin=child_stdin, stdout=child_stdout) child_instance.execute() if has_combiner: sorted_lines = sorted(child_stdout.getvalue().splitlines()) combiner_stdin = BytesIO(b'\n'.join(sorted_lines)) else: child_stdout.flush() finally: child_stdout.close() while len(self._counters) <= step_num: self._counters.append({}) parse_mr_job_stderr(child_instance.stderr.getvalue(), counters=self._counters[step_num]) if has_combiner: self._run_step(step_num, 'combiner', None, output_path, working_dir, env, child_stdin=combiner_stdin) combiner_stdin.close()
def test_encoder_8bits(self): spec1d = self.spec1d json_1d = dumps(self.spec1d, cls=pngSpecEncoder) parsed_json = loads(json_1d) assert parsed_json['bits'] == 8 assert parsed_json['format'] == 'png' assert parsed_json['nd'] == 1 ts.assert_allclose(parsed_json['x_domain'], spec1d.uc[0].ppm_limits() , 1e-7,0, 'x_domain is incorrect') ts.assert_allclose(parsed_json['y_domain'], [spec1d.real.min(), spec1d.real.max()] , 1e-7,0, 'y_domain is incorrect') # Decode the base64 png and get pixel data decoded_png = parsed_json['data'].decode('base64') img_data = BytesIO() img_data.write(decoded_png) img_data.flush() img_data.seek(0) img = Image.open(img_data) img_arr = asarray(list(img.getdata())) # Check image shape ts.assert_equal(img_arr.shape, spec1d.shape, 'image shape doesnt match spectrum') # Check data resolution y_domain = asarray(parsed_json['y_domain']) resolution = y_domain.ptp() / (2**8-1) scaled = (img_arr * resolution)+y_domain[0] ts.assert_equal( sum(spec1d-scaled > resolution), 0, 'Some scaled points are diviated from the sepctrum by more than accepted resolution')
def _read_manifest_from_stdin(self, read_fileobj=None, chunk_size=None): ''' Attempts to read xml provided to stdin and convert it to a downloadmanifest obj. :returns downloadmanifest obj. ''' chunk_size = chunk_size or stages._chunk_size read_fileobj = read_fileobj or sys.stdin self.log.debug('Reading Manifest from stdin') fileobj = BytesIO() while True: chunk = read_fileobj.read(chunk_size) if not chunk: break self.log.debug('Chunk:' + str(chunk)) fileobj.write(chunk) fileobj.flush() fileobj.seek(0) with fileobj: manifest = DownloadManifest._read_from_fileobj( manifest_fileobj=fileobj, xsd=self.args.xsd, key_filename=self.args.privatekey, sig_key_filename=self.args.cloudcert) return manifest
def export_to_csv(table, filename_or_fobj=None, encoding='utf-8', dialect=unicodecsv.excel, *args, **kwargs): '''Export a `rows.Table` to a CSV file If a file-like object is provided it MUST be in binary mode, like in `open(filename, mode='wb')`. If not filename/fobj is provided, the function returns a string with CSV contents. ''' # TODO: will work only if table.fields is OrderedDict # TODO: should use fobj? What about creating a method like json.dumps? if filename_or_fobj is not None: _, fobj = get_filename_and_fobj(filename_or_fobj, mode='wb') else: fobj = BytesIO() writer = unicodecsv.writer(fobj, encoding=encoding, dialect=dialect) for row in serialize(table, *args, **kwargs): writer.writerow(row) if filename_or_fobj is not None: fobj.flush() return fobj else: fobj.seek(0) result = fobj.read() fobj.close() return result
def pack(self, files, jad_properties=None): jad_properties = jad_properties or {} # pack files into jar buffer = BytesIO(self.jar) with ZipFile(buffer, 'a', ZIP_DEFLATED) as zipper: for path, f in files.items(): zipper.writestr(path, convert_XML_To_J2ME(f, path, self.use_j2me_endpoint)) buffer.flush() jar = buffer.getvalue() buffer.close() # update and sign jad signed = False if self.jad: jad = JadDict.from_jad(self.jad, use_j2me_endpoint=self.use_j2me_endpoint) jad.update({ 'MIDlet-Jar-Size': len(jar), }) jad.update(jad_properties) if hasattr(settings, 'JAR_SIGN'): jad = sign_jar(jad, jar, use_j2me_endpoint=self.use_j2me_endpoint) signed = True else: jad = jad.render() else: jad = None return JadJar(jad, jar, self.version, self.build_number, signed=signed, use_j2me_endpoint=self.use_j2me_endpoint)
def get_compressed_file(self): in_memory_zip = BytesIO() zf = zipfile.ZipFile(in_memory_zip, "w", zipfile.ZIP_DEFLATED) zf.writestr(self.get_filename(), self.get_content().getvalue()) zf.close() in_memory_zip.flush() return in_memory_zip
def dumpIO_source(object, **kwds): """write object source to a buffer (instead of dill.dump) Loads by with dill.temp.loadIO_source. Returns the buffer object. >>> f = lambda x:x**2 >>> pyfile = dill.temp.dumpIO_source(f, alias='_f') >>> _f = dill.temp.loadIO_source(pyfile) >>> _f(4) 16 Optional kwds: If 'alias' is specified, the object will be renamed to the given string. """ from .source import importable, getname if PY3: from io import BytesIO as StringIO else: from StringIO import StringIO alias = kwds.pop('alias', '') #XXX: include an alias so a name is known name = str(alias) or getname(object) name = "\n#NAME: %s\n" % name #XXX: assumes kwds['dir'] is writable and on $PYTHONPATH file = StringIO() file.write(b(''.join([importable(object, alias=alias),name]))) file.flush() return file
class Logger(object): '''class printing to stdout and to log''' def __init__(self): self._tmp_sysout = sys.stdout self.terminal = sys.stdout self.log = BytesIO() # write everything in bytes to the stream def write(self, message): self.terminal.write(message) if not isinstance(message, bytes): message = message.encode('utf8') self.log.write(message) def flush(self): # this flush method is needed for python 3 compatibility. # In the example here they pass: # https://stackoverflow.com/questions/14906764/how-to-redirect-stdout-to-both-file-and-console-with-scripting # here we forward the flush: self.terminal.flush() self.log.flush() def close(self, fileout=None): '''closes the streams, restores sys.stdout and write the content to fileout, if provided''' sys.stdout = self._tmp_sysout if fileout: with open(fileout, 'wb') as opn: opn.write(self.log.getvalue()) try: self.log.close() except: pass
def render(self, context, instance, placeholder): def sequence(): n = 0 while True: yield n n += 1 context = super(YoutubeSliderPlugin, self).render(context, instance, placeholder) slides = [] YoutubeVideo = namedtuple('YoutubeVideo', 'pos, video_id, video_thumb') pos = sequence() for slide in instance.slides.all().order_by('order'): if slide.is_playlist: response = requests.get(slide.playlist_link) if response.status_code == 200: xml = BytesIO(response.content) xml.flush() xml.seek(0) for entry in ET.parse(xml).getroot().findall(u"{http://www.w3.org/2005/Atom}entry"): video_id = slide.video_id( entry.findall(u"{http://www.w3.org/2005/Atom}link")[0].attrib.get('href', 'X')) slides.append(YoutubeVideo(pos.next(), video_id, slide.video_thumb(video_id))) else: slides.append(YoutubeVideo(pos.next(), slide.video_id, slide.video_thumb)) context.update({ 'slides': slides, 'description': instance.description.strip() }) return context
def read_from_url(cls, manifest_url, chunk_size=None, xsd=None, key_filename=None, sig_key_filename=None): ''' Reads xml at the provided 'manifest_url' and attempts to convert and validate the resulting DownloadManifest. :param manifest_url: URL to download the 'download manifest xml' :param chunksize: # of bytes to read/write per read()/write() :param xsd: local file path to xsd used to validate xml :param key_filename: optional path to key used to decrypt manifest fields/values. :returns DowloadManifest obj. ''' manifest_url = str(manifest_url) chunk_size = chunk_size or stages._chunk_size fileobj = BytesIO() r = requests.get(manifest_url, stream=True) r.raise_for_status() for chunk in r.iter_content(chunk_size): fileobj.write(chunk) fileobj.flush() fileobj.seek(0) return cls._read_from_fileobj(manifest_fileobj=fileobj, xsd=xsd, key_filename=key_filename, sig_key_filename=sig_key_filename)
def download_images_as_zip(request, images): """ Sends many images to the client. This methos does NOT check permissions! """ # FIXME: this sholdn't be done in-memory # FIXME: this should be done asynchronously response = HttpResponse(content_type='application/zip') # # From https://code.djangoproject.com/wiki/CookBookDynamicZip # response['Content-Disposition'] = 'filename=all.zip' # now add them to a zip file # note the zip only exist in memory as you add to it zip_buffer = BytesIO() zip_file = zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) for an_image in images: zip_file.writestr(an_image.original_filename or an_image.thumbnail_original_filename, an_image.image.read()) zip_file.close() zip_buffer.flush() # the import detail--we return the content of the buffer ret_zip = zip_buffer.getvalue() zip_buffer.close() response.write(ret_zip) return response
def test_writer_flush_error(self): s = BytesIO() s = tcp.Writer(s) o = mock.MagicMock() o.flush = mock.MagicMock(side_effect=socket.error) s.o = o with pytest.raises(exceptions.TcpDisconnect): s.flush()
def test_wrap(self): s = BytesIO(b"foobar\nfoobar") s.flush() s = tcp.Reader(s) assert s.readline() == b"foobar\n" assert s.readline() == b"foobar" # Test __getattr__ assert s.isatty
def get_as_dds(self): output = BytesIO() if not hasattr(self, '_height'): return output.write(b'DDS ') # magic output.write(struct.pack("<I", 124)) # size output.write(struct.pack("<I", 0)) output.write(struct.pack("<I", self._height)) output.write(struct.pack("<I", self._width)) output.write(struct.pack("<IIIIIIIIIIIIII", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) output.write(struct.pack("<I", 32)) # size if self._type == b'\x41\x14': output.write(struct.pack("<I", 0x41)) output.write(struct.pack("<I", 0)) output.write(struct.pack("<I", 0x10)) output.write(struct.pack("<I", 0x7c00)) output.write(struct.pack("<I", 0x03E0)) output.write(struct.pack("<I", 0x1F)) output.write(struct.pack("<I", 0x8000)) elif self._type == b'\x40\x14': output.write(struct.pack("<I", 0x41)) output.write(struct.pack("<I", 0)) output.write(struct.pack("<I", 0x10)) output.write(struct.pack("<I", 0x0F00)) output.write(struct.pack("<I", 0xF0)) output.write(struct.pack("<I", 0x0F)) output.write(struct.pack("<I", 0xF000)) elif self._type == b'\x50\x14': output.write(struct.pack("<I", 0x41)) output.write(struct.pack("<I", 0)) output.write(struct.pack("<I", 0x20)) output.write(struct.pack("<I", 0xFF0000)) output.write(struct.pack("<I", 0xFF00)) output.write(struct.pack("<I", 0xFF)) output.write(struct.pack("<I", 0xFF000000)) elif self._type == b'\x20\x34': output.write(struct.pack("<I", 0x04)) output.write(b'DXT1') output.write(struct.pack("<IIIII", 0, 0, 0, 0, 0)) elif self._type == b'\x31\x34': output.write(struct.pack("<I", 0x04)) output.write(b'DXT5') output.write(struct.pack("<IIIII", 0, 0, 0, 0, 0)) #else: #logging.error('Unknown type: %s', self._type) output.write(struct.pack("<I", 0x0010)) output.write(struct.pack("<IIII", 0, 0, 0, 0)) output.write(self._data) output.flush() output.seek(0) return output
def _copyIconSignal(self): pngFile = BytesIO() self.graph.saveGraph(pngFile, fileFormat='png') pngFile.flush() pngFile.seek(0) pngData = pngFile.read() pngFile.close() image = qt.QImage.fromData(pngData, 'png') qt.QApplication.clipboard().setImage(image)
def decrypt(self, b_vaulttext, b_password, key_length=32): """ Decrypt the given data and return it :arg b_data: A byte string containing the encrypted data :arg b_password: A byte string containing the encryption password :arg key_length: Length of the key :returns: A byte string containing the decrypted data """ display.deprecated(u'The VaultAES format is insecure and has been' ' deprecated since Ansible-1.5. Use vault rekey FILENAME to' ' switch to the newer VaultAES256 format', version='2.3') # http://stackoverflow.com/a/14989032 b_ciphertext = unhexlify(b_vaulttext) in_file = BytesIO(b_ciphertext) in_file.seek(0) out_file = BytesIO() bs = AES.block_size b_tmpsalt = in_file.read(bs) b_salt = b_tmpsalt[len(b'Salted__'):] b_key, b_iv = self._aes_derive_key_and_iv(b_password, b_salt, key_length, bs) cipher = AES.new(b_key, AES.MODE_CBC, b_iv) b_next_chunk = b'' finished = False while not finished: b_chunk, b_next_chunk = b_next_chunk, cipher.decrypt(in_file.read(1024 * bs)) if len(b_next_chunk) == 0: if PY3: padding_length = b_chunk[-1] else: padding_length = ord(b_chunk[-1]) b_chunk = b_chunk[:-padding_length] finished = True out_file.write(b_chunk) out_file.flush() # reset the stream pointer to the beginning out_file.seek(0) b_out_data = out_file.read() out_file.close() # split out sha and verify decryption b_split_data = b_out_data.split(b"\n", 1) b_this_sha = b_split_data[0] b_plaintext = b_split_data[1] b_test_sha = to_bytes(sha256(b_plaintext).hexdigest()) if b_this_sha != b_test_sha: raise AnsibleError("Decryption failed") return b_plaintext
def alpino_parse_tokenized_file_socket(tokenized_file_path: Path, target_dir_path: Path, host='127.0.0.1', base_port_number=42424, n_instances: int=4): """ Warning: produces corrupt output for half of the sentences. """ chosen_port = randrange(base_port_number, base_port_number + n_instances) parsed_sentences_files_dir_path, parsed_sentence_file_path_prefix = \ prepare_parsing(tokenized_file_path=tokenized_file_path, target_dir_path=target_dir_path) if parsed_sentences_files_dir_path is not None: with tokenized_file_path.open(mode='rb') as tokenized_file: parsed_sentence_buffer = BytesIO() sentence_index = 0 for sentence in tokenized_file.readlines(): sentence_index += 1 with socket(AF_INET, SOCK_STREAM) as alpino_socket: # alpino_socket.settimeout() # TODO: set timeout equal # to Alpino timeout alpino_socket.connect_ex((host, chosen_port)) alpino_socket.sendall(sentence + b'\n\n') while True: parsed_sentence_xml_chunk = alpino_socket.recv( ALPINO_SOCKET_BUFFER_SIZE) if not parsed_sentence_xml_chunk: alpino_socket.sendall(b'\n\n') break else: parsed_sentence_buffer.write( parsed_sentence_xml_chunk) parsed_sentence = parsed_sentence_buffer.getvalue() parsed_sentence_buffer.truncate() parsed_sentence_buffer.flush() parsed_sentence_buffer.seek(0) parsed_sentence_file_path = \ parsed_sentence_file_path_prefix.with_suffix( '.{0:d}.xml'.format(sentence_index)) with parsed_sentence_file_path.open( mode='wb') as parsed_sentence_file: parsed_sentence_file.write(parsed_sentence) info( "Parsed tokenized file to '{" "parsed_sentence_file_path_prefix}'.*.xml . " "".format( parsed_sentence_file_path_prefix=parsed_sentence_file_path_prefix))
def check_simple_write_read(records, indent=" "): #print indent+"Checking we can write and then read back these records" for format in test_write_read_alignment_formats: if format not in possible_unknown_seq_formats \ and isinstance(records[0].seq, UnknownSeq) \ and len(records[0].seq) > 100: #Skipping for speed. Some of the unknown sequences are #rather long, and it seems a bit pointless to record them. continue print indent+"Checking can write/read as '%s' format" % format #Going to write to a handle... if format in SeqIO._BinaryFormats: handle = BytesIO() else: handle = StringIO() try: c = SeqIO.write(sequences=records, handle=handle, format=format) assert c == len(records) except (TypeError, ValueError), e: #This is often expected to happen, for example when we try and #write sequences of different lengths to an alignment file. if "len()" in str(e): #Python 2.4.3, #>>> len(None) #... #TypeError: len() of unsized object # #Python 2.5.2, #>>> len(None) #... #TypeError: object of type 'NoneType' has no len() print "Failed: Probably len() of None" else: print indent+"Failed: %s" % str(e) if records[0].seq.alphabet.letters is not None: assert format != t_format, \ "Should be able to re-write in the original format!" #Carry on to the next format: continue handle.flush() handle.seek(0) #Now ready to read back from the handle... try: records2 = list(SeqIO.parse(handle=handle, format=format)) except ValueError, e: #This is BAD. We can't read our own output. #I want to see the output when called from the test harness, #run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(records)))
def _plotAsPNG(plot): """Save a :class:`Plot` as PNG and return the payload. :param plot: The :class:`Plot` to save """ pngFile = BytesIO() plot.saveGraph(pngFile, fileFormat='png') pngFile.flush() pngFile.seek(0) data = pngFile.read() pngFile.close() return data
class UniversalBytesIO: def __init__(self, container=None, charset=None): self.charset = charset or settings.DEFAULT_CHARSET self._container = BytesIO() if container is None else container # These methods partially implement the file-like object interface. # See https://docs.python.org/3/library/io.html#io.IOBase def close(self): self._container.close() def write(self, content): self._container.write(self.make_bytes(content)) def flush(self): self._container.flush() def tell(self): return self._container.tell() def readable(self): return False def seekable(self): return False def writable(self): return True def writelines(self, lines): for line in lines: self.write(line) def make_bytes(self, value): """Turn a value into a bytestring encoded in the output charset.""" if isinstance(value, bytes): return bytes(value) if isinstance(value, str): return bytes(value.encode(self.charset)) # Handle non-string types return force_bytes(value, self.charset) def get_string_value(self): return self._container.getvalue().decode(self.charset) def getvalue(self): return self._container.getvalue() if sys.version_info[0:2] < (3, 5): def seek(self, *args, **kwargs): pass
def test_write_and_read(self): handle = BytesIO() sequence = Seq(self.nucleotides) record = SeqRecord(sequence) n = SeqIO.write(record, handle, 'nib') self.assertEqual(n, 1) handle.flush() handle.seek(0) record = SeqIO.read(handle, 'nib') handle.close() sequence = record.seq self.assertEqual(str(sequence), self.nucleotides)
def copyToClipboard(self): """ Copy the plot to the clipboard """ pngFile = BytesIO() self.saveGraph(pngFile, fileFormat='png') pngFile.flush() pngFile.seek(0) pngData = pngFile.read() pngFile.close() image = qt.QImage.fromData(pngData, 'png') qt.QApplication.clipboard().setImage(image)
def export_to_csv( table, filename_or_fobj=None, encoding="utf-8", dialect=unicodecsv.excel, batch_size=100, callback=None, *args, **kwargs ): """Export a `rows.Table` to a CSV file. If a file-like object is provided it MUST be in binary mode, like in `open(filename, mode='wb')`. If not filename/fobj is provided, the function returns a string with CSV contents. """ # TODO: will work only if table.fields is OrderedDict # TODO: should use fobj? What about creating a method like json.dumps? if filename_or_fobj is not None: _, fobj = get_filename_and_fobj(filename_or_fobj, mode="wb") else: fobj = BytesIO() # TODO: may use `io.BufferedWriter` instead of `ipartition` so user can # choose the real size (in Bytes) when to flush to the file system, instead # number of rows writer = unicodecsv.writer(fobj, encoding=encoding, dialect=dialect) if callback is None: for batch in ipartition(serialize(table, *args, **kwargs), batch_size): writer.writerows(batch) else: serialized = serialize(table, *args, **kwargs) writer.writerow(next(serialized)) # First, write the header total = 0 for batch in ipartition(serialized, batch_size): writer.writerows(batch) total += len(batch) callback(total) if filename_or_fobj is not None: fobj.flush() return fobj else: fobj.seek(0) result = fobj.read() fobj.close() return result
def testPNMsbit(self): """Test that PNM files can generates sBIT chunk.""" s = BytesIO() s.write(strtobytes('P6 8 1 1\n')) for pixel in range(8): s.write(struct.pack('<I', (0x4081 * pixel) & 0x10101)[:3]) s.flush() s.seek(0) o = BytesIO() _redirect_io(s, o, lambda: png.pnm2png.main(['testPNMsbit'])) r = png.Reader(bytes=o.getvalue()) sbit = r.chunk('sBIT')[1] self.assertEqual(sbit, strtobytes('\x01\x01\x01'))
def create_match_sheets(matches): output_pdf = PdfFileWriter() for match in matches: match_sheet_base_layer = _get_match_sheet_base_layer() match_sheet_base_layer.mergePage(_draw_match_sheet(match)) output_pdf.addPage(match_sheet_base_layer) output_stream = BytesIO() output_pdf.write(output_stream) output_stream.flush() output_stream.seek(0) return output_stream
def testPGMin(self): """Test that the command line tool can read PGM files.""" s = BytesIO() s.write(strtobytes('P5 2 2 3\n')) s.write(strtobytes('\x00\x01\x02\x03')) s.flush() s.seek(0) o = BytesIO() _redirect_io(s, o, lambda: png.pnm2png.main(['testPGMin'])) r = png.Reader(bytes=o.getvalue()) r.read() self.assertEqual(r.greyscale, True) self.assertEqual(r.bitdepth, 2)
def decrypt(self, data, password, key_length=32): """ Read encrypted data from in_file and write decrypted to out_file """ # http://stackoverflow.com/a/14989032 data = b''.join(data.split(b'\n')) data = unhexlify(data) in_file = BytesIO(data) in_file.seek(0) out_file = BytesIO() bs = AES.block_size tmpsalt = in_file.read(bs) salt = tmpsalt[len('Salted__'):] key, iv = self.aes_derive_key_and_iv(password, salt, key_length, bs) cipher = AES.new(key, AES.MODE_CBC, iv) next_chunk = b'' finished = False while not finished: chunk, next_chunk = next_chunk, cipher.decrypt(in_file.read(1024 * bs)) if len(next_chunk) == 0: if PY2: padding_length = ord(chunk[-1]) else: padding_length = chunk[-1] chunk = chunk[:-padding_length] finished = True out_file.write(chunk) out_file.flush() # reset the stream pointer to the beginning out_file.seek(0) out_data = out_file.read() out_file.close() new_data = to_unicode(out_data) # split out sha and verify decryption split_data = new_data.split("\n") this_sha = split_data[0] this_data = '\n'.join(split_data[1:]) test_sha = sha256(to_bytes(this_data)).hexdigest() if this_sha != test_sha: raise errors.AnsibleError("Decryption failed") return this_data
def zip_response(files, filename): response = HttpResponse(content_type='application/zip') response['Content-Disposition'] = 'filename=%s' % filename buffer = BytesIO() zip = zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) for name, bytes in files: zip.writestr(name, bytes) zip.close() buffer.flush() ret_zip = buffer.getvalue() buffer.close() response.write(ret_zip) return response
def download_extern_document(self, model, id, filename=None, **kwargs): invoice = request.env[model].search([('id', '=', id)]) reponds = BytesIO() archive = zipfile.ZipFile(reponds, 'w', zipfile.ZIP_DEFLATED) checklist = request.env['intern.document'].search([('name', '=', "Checklist")], limit=1) if checklist: stream = BytesIO(checklist[0].attachment.decode("base64")) tpl = DocxTemplate(stream) tempFile = NamedTemporaryFile(delete=False) tpl.render({}) tpl.save(tempFile) tempFile.flush() tempFile.close() archive.write(tempFile.name, 'Checklist.docx') os.unlink(tempFile.name) doc1_13_1 = invoice.create_1_13_1() archive.write(doc1_13_1.name, u'1-13号 HOANG HUNG JAPAN 訓連センター.docx') os.unlink(doc1_13_1.name) doc1_13_2 = invoice.create_1_13_2() archive.write(doc1_13_2.name, u'1-13号HOANG HUNG 会社.docx') os.unlink(doc1_13_2.name) master = invoice.create_master() archive.write(master.name, 'Master.docx') os.unlink(master.name) doc1_29 = invoice.create_doc_1_29() archive.write(doc1_29.name, '1_29.docx') os.unlink(doc1_29.name) doc_list_send = invoice.create_list_of_sent_en() archive.write(doc_list_send.name, u'推薦書 - ENG.docx') os.unlink(doc_list_send.name) doc_list_send_jp = invoice.create_list_of_sent_jp() archive.write(doc_list_send_jp.name, u'推薦書.docx') os.unlink(doc_list_send_jp.name) doc1_20 = invoice.create_doc_1_20() archive.write(doc1_20.name, '1_20.docx') os.unlink(doc1_20.name) for i, intern in enumerate(invoice.interns_pass): doc1_3 = invoice.create_doc_1_3(intern, i) archive.write(doc1_3.name, '1_3_%d_%s.docx' % ((i+1),intern_utils.name_with_underscore(intern.name))) os.unlink(doc1_3.name) doc1_10 = invoice.create_doc_1_10(intern) archive.write(doc1_10.name, '1_10_%d_%s.docx' % ((i+1),intern_utils.name_with_underscore(intern.name))) os.unlink(doc1_10.name) doc1_21 = invoice.create_doc_1_21(intern) archive.write(doc1_21.name, '1_21_%d_%s.docx' % ((i+1),intern_utils.name_with_underscore(intern.name))) os.unlink(doc1_21.name) doc1_28 = invoice.create_doc_1_28(intern,i) archive.write(doc1_28.name, '1_28_%d_%s.docx' % ((i+1),intern_utils.name_with_underscore(intern.name))) os.unlink(doc1_28.name) hdtn = invoice.create_hdtn(intern) archive.write(hdtn.name, 'hdtn_%d_%s.docx' % ((i+1),intern_utils.name_with_underscore(intern.name))) os.unlink(hdtn.name) hdtv = invoice.create_hdtv(intern) archive.write(hdtv.name, 'hdtv_%d_%s.docx' % ((i+1),intern_utils.name_with_underscore(intern.name))) os.unlink(hdtv.name) archive.close() reponds.flush() ret_zip = reponds.getvalue() reponds.close() return request.make_response(ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(filename))])
def get(self, *args, **kwargs): assert self.serializer_class is not None, ( "'%s' should either include a `serializer_class` " % self.__class__.__name__ ) queryset = self.get_queryset() layer = self.get_serializer(queryset, many=True) # first = self.queryset.first() # geometry_type = first.geometry.geom_type # FIXME take it from self.geo_field geo_field = None for field in self.queryset.model._meta.fields: if isinstance(field, GeometryField) and field.name == self.geo_field: geo_field = field crs = from_epsg(geo_field.srid) if self.geometry_type: geometry_type = self.geometry_type else: geometry_type = geo_field.geom_type serializer_fields = OrderedDict(layer.child.fields) properties = serializer_fields.copy() properties.pop(self.geo_field) for field_name, field_type in serializer_fields.items(): if isinstance(field_type, relations.ManyRelatedField): raise AttributeError("All Many to Many fields should be exclude from serializer. Field: " + field_name) if not (isinstance(field_type, rest_framework_gis.fields.GeometryField) or isinstance(field_type, spillway_fields.GeometryField)): properties[field_name] = self._get_fiona_type(field_type) schema = {"geometry": geometry_type, "properties": properties} temp_file = tempfile.NamedTemporaryFile(suffix='.shp', mode='w+b') temp_file.close() with fiona.open( temp_file.name, mode='w', driver='ESRI Shapefile', crs=crs, schema=schema, encoding='iso-8859-1', ) as shapefile: # encoding='utf-8', ) as shapefile: shapefile.writerecords(layer.data['features']) buffer = BytesIO() zip = zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) file_ext = ['shp', 'shx', 'prj', 'dbf'] for item in file_ext: filename = '%s.%s' % (temp_file.name.replace('.shp', ''), item) zip.write(filename, arcname='%s.%s' % (self.file_name.replace('.shp', ''), item)) if self.readme: zip.writestr('README.txt', self.readme) zip.close() buffer.flush() zip_stream = buffer.getvalue() buffer.close() # Stick it all in a django HttpResponse response = HttpResponse() response['Content-Disposition'] = 'attachment; filename=%s.zip' % self.file_name response['Content-length'] = str(len(zip_stream)) response['Content-Type'] = 'application/zip' response.write(zip_stream) return response
def download_extern_document(self, id=False, **kwargs): reponds = BytesIO() archive = zipfile.ZipFile(reponds, 'w', zipfile.ZIP_DEFLATED) file_maps = {} document = request.env['report.doccument'].browse(int(id)) invoice = request.env['intern.invoice'] infor = document.enterprise list_interns = invoice.interns_pass_doc interns_pass = sorted(list_interns, key=lambda x: x.sequence_pass) doc_type = document.document data = document.enterprise.lienket.interns_pass_doc if doc_type == 'Doc1-3': for thuctapsinh in data: doc1_3 = invoice.create_Doc_1_3(infor, thuctapsinh.id) file_maps.update( {u'1-3_%s.docx' % thuctapsinh.name: doc1_3.name}) elif doc_type == 'Doc1-10': counter = 0 for thuctapsinh in data: counter += 1 doc1_10 = invoice.create_Doc_1_10(infor, thuctapsinh.id) file_maps.update( {u'1-10_%s.docx' % thuctapsinh.name: doc1_10.name}) elif doc_type == 'Doc1-13': doc1_13_1 = invoice.create_Doc_1_13_1(infor) file_maps.update( {u'1-13-1号 HOANG HUNG JAPAN 訓連センター.docx': doc1_13_1.name}) doc1_13_2 = invoice.create_Doc_1_13_2(infor) file_maps.update({u'1-13-2号HOANG HUNG 会社.docx': doc1_13_2.name}) elif doc_type == 'Doc1-20': doc1_20 = invoice.create_Doc_1_20(infor) file_maps.update({u'1-20.docx': doc1_20.name}) elif doc_type == 'Doc1-21': counter = 0 for thuctapsinh in data: counter += 1 doc1_21 = invoice.create_Doc_1_21(infor, thuctapsinh.id) file_maps.update( {u'1-21_%s.docx' % thuctapsinh.name: doc1_21.name}) elif doc_type == 'Doc1-27': doc1_27 = invoice.create_Doc_1_27(infor) file_maps.update({u'1-27.docx': doc1_27.name}) elif doc_type == 'Doc1-28': for thuctapsinh in data: doc1_28 = invoice.create_Doc_1_28(infor, thuctapsinh.id) file_maps.update( {u'1_28_%s.docx' % thuctapsinh.name: doc1_28.name}) elif doc_type == 'Doc1-29': doc1_29 = invoice.create_Doc_1_29(infor) file_maps.update({u'1_29.docx': doc1_29.name}) elif doc_type == 'DocCCDT': docCCDT = invoice.create_certification_end_train(infor) file_maps.update({u'DocCCDT.docx': docCCDT.name}) elif doc_type == 'HDPC': counter = 0 for thuctapsinh in data: counter += 1 dochdtn = invoice.create_hdtn(infor, thuctapsinh.id) file_maps.update( {u'HDPCTN_%s.docx' % thuctapsinh.name: dochdtn.name}) dochdtv = invoice.create_hdtv(infor, thuctapsinh.id) file_maps.update( {u'HDPCTV_%s.docx' % thuctapsinh.name: dochdtv.name}) elif doc_type == 'PROLETTER': doc_list_send = invoice.create_list_of_sent_en(infor) file_maps.update({u'推薦書 - ENG.docx': doc_list_send.name}) doc_list_send_jp = invoice.create_list_of_sent_jp(infor) file_maps.update({u'推薦書.docx': doc_list_send_jp.name}) elif doc_type == 'DSLD': DSLD = invoice.create_danh_sach_lao_dong(infor) file_maps.update({u'DSLD.docx': DSLD.name}) elif doc_type == 'CheckList': CheckList = invoice.create_check_list(infor) file_maps.update({u'CheckList.docx': CheckList.name}) elif doc_type == 'Doc4-8': Doc4_8 = invoice.create_48(infor) file_maps.update({u'Doc48.docx': Doc4_8.name}) elif doc_type == 'Master': Master = invoice.create_master(infor) file_maps.update({u'Master.docx': Master.name}) elif doc_type == 'Doc-ALL': for thuctapsinh in data: doc1_3 = invoice.create_Doc_1_3(infor, thuctapsinh.id) file_maps.update( {u'1-3_%s.docx' % thuctapsinh.name: doc1_3.name}) for thuctapsinh in data: doc1_10 = invoice.create_Doc_1_10(infor, thuctapsinh.id) file_maps.update( {u'1-10_%s.docx' % thuctapsinh.name: doc1_10.name}) doc1_13_1 = invoice.create_Doc_1_13_1(infor) file_maps.update( {u'1-13-1号 HOANG HUNG JAPAN 訓連センター.docx': doc1_13_1.name}) doc1_13_2 = invoice.create_Doc_1_13_2(infor) file_maps.update({u'1-13-2号HOANG HUNG 会社.docx': doc1_13_2.name}) doc1_20 = invoice.create_Doc_1_20(infor) file_maps.update({u'1-20.docx': doc1_20.name}) for thuctapsinh in data: doc1_21 = invoice.create_Doc_1_21(infor, thuctapsinh.id) file_maps.update( {u'1-21_%s.docx' % thuctapsinh.name: doc1_21.name}) doc1_27 = invoice.create_Doc_1_27(infor) file_maps.update({u'1-27.docx': doc1_27.name}) for thuctapsinh in data: doc1_28 = invoice.create_Doc_1_28(infor, thuctapsinh.id) file_maps.update( {u'1_28_%s.docx' % thuctapsinh.name: doc1_28.name}) doc1_29 = invoice.create_Doc_1_29(infor) file_maps.update({u'1_29.docx': doc1_29.name}) docCCDT = invoice.create_certification_end_train(infor) file_maps.update({u'DocCCDT.docx': docCCDT.name}) doc_list_send = invoice.create_list_of_sent_en(infor) file_maps.update({u'推薦書 - ENG.docx': doc_list_send.name}) doc_list_send_jp = invoice.create_list_of_sent_jp(infor) file_maps.update({u'推薦書.docx': doc_list_send_jp.name}) for thuctapsinh in data: dochdtn = invoice.create_hdtn(infor, thuctapsinh.id) file_maps.update( {u'HDPCTN_%s.docx' % thuctapsinh.name: dochdtn.name}) dochdtv = invoice.create_hdtv(infor, thuctapsinh.id) file_maps.update( {u'HDPCTV_%s.docx' % thuctapsinh.name: dochdtv.name}) DSLD = invoice.create_danh_sach_lao_dong(infor) file_maps.update({u'DSLD.docx': DSLD.name}) CheckList = invoice.create_check_list(infor) file_maps.update({u'CheckList.docx': CheckList.name}) Doc4_8 = invoice.create_48(infor) file_maps.update({u'Doc48.docx': Doc4_8.name}) Master = invoice.create_master(infor) file_maps.update({u'Master.docx': Master.name}) for key in file_maps: archive.write(file_maps[key], key) os.unlink(file_maps[key]) archive.close() reponds.flush() ret_zip = reponds.getvalue() reponds.close() # --------------------------------------------------------------------------------------------------- if doc_type == 'Doc1-3': doc1_3 = 'Doc1_3.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_3))]) if doc_type == 'Doc1-10': doc1_10 = 'Doc1_10.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_10))]) if doc_type == 'Doc1-13': doc1_13_1 = 'Doc1_13_1.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_13_1))]) doc1_13_2 = 'Doc1_13_2.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_13_2))]) if doc_type == 'Doc1-20': doc1_20 = 'Doc1_20.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_20))]) if doc_type == 'Doc1-21': doc1_21 = 'Doc1_21.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_21))]) if doc_type == 'Doc1-27': doc1_27 = 'Doc1_27.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_27))]) if doc_type == 'Doc1-28': doc1_28 = 'Doc1_28.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_28))]) if doc_type == 'Doc1-29': doc1_29 = 'Doc1_29.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc1_29))]) if doc_type == 'DocCCDT': docCCDT = 'DocCCDT.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(docCCDT))]) if doc_type == 'HDPC': dochdtn = 'HDPC.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(dochdtn))]) if doc_type == 'PROLETTER': doc_list_send = 'PROLETTER.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(doc_list_send))]) if doc_type == 'DSLD': DSLD = 'DSLD.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(DSLD))]) if doc_type == 'CheckList': CheckList = 'CheckList.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(CheckList))]) if doc_type == 'Doc4-8': Doc4_8 = 'Doc4-8.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(Doc4_8))]) if doc_type == 'Master': Master = 'Master.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(Master))]) if doc_type == 'Doc-ALL': demo = 'Full.zip' return request.make_response( ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(demo))])
class DataIO(BinFormat): """This class wraps a binary file or a string of bytes and provides both the file and bytes API. """ def __init__(self, f): if isinstance(f,bytes): self.f=BytesIO(f) else: self.f=f def __getitem__(self,i): stay = self.f.tell() sta = i.start or stay self.f.seek(sta,0) if i.stop is None: data = self.f.read() else: data = self.f.read(i.stop-sta) self.f.seek(stay,0) return data def read(self,size=-1): return self.f.read(size) def readline(self,size=-1): return self.f.readline(size) def readlines(self,size=-1): return self.f.readlines(size) def xreadlines(self,size=-1): return self.f.xreadlines(size) def write(self,s): return self.f.write(s) def writelines(self,l): return self.f.writelines(l) def seek(self,offset,whence=0): return self.f.seek(offset,whence) def tell(self): return self.f.tell() def flush(self): return self.f.flush() def fileno(self): return self.f.fileno() def isatty(self): return self.f.isatty() def next(self): return self.f.next() def truncate(self,size=0): return self.f.truncate(size) def close(self): return self.f.close() @property def closed(self): return self.f.closed @property def encoding(self): return self.f.encoding @property def errors(self): return self.f.errors @property def mode(self): return self.f.mode @property def name(self): try: return self.f.name except AttributeError: try: from builtins import bytes except ImportError: pass s = bytes(self.f.getvalue()) return '(sc-%s...)'%(''.join(["%02x"%x for x in s])[:8]) filename = name @property def newlines(self): return self.f.newlines @property def softspace(self): return self.f.softspace
def __init__(self, name): self.name = name self.name_backwards = name[::-1] data = [] data.append(SimpleObject('pickle')) data.append(SimpleObject('cPickle')) data.append(SimpleObject('last')) # Simulate a file with StringIO out_s = BytesIO() # Write to the stream for o in data: print('WRITING : %s (%s)' % (o.name, o.name_backwards)) pickle.dump(o, out_s) out_s.flush() # Set up a read-able stream in_s = BytesIO(out_s.getvalue()) # Read the data while True: try: o = pickle.load(in_s) except EOFError: break else: print('READ : %s (%s)' % (o.name, o.name_backwards))
def decrypt_aes(file_or_data, password=None, outfile=None, salt=None, mode=None, base64encode=False, chunksize=512 * 1024): r""" Flexible implementaiton of AES decryption Parameters ---------- file_or_data : {BufferObject, string, bytes} input data will be converted to bytes sequence for encryption password : {str, None} if None, a prompt will ask for inputing password outfile : {None, path, file} if None, return raw encrypted data salt : {None, string, bytes} salt for password Hashing mode : Cipher.AES.MODE_* chunksize : int encryption chunk, multiple of 16. """ try: from Crypto.Cipher import AES except ImportError as e: raise ImportError("Require 'pycrypto' to run this function") if mode is None: mode = AES.MODE_CBC if password is None: password = input("Your password: "******"Password length must be greater than 0" password = to_password(password, salt) # ====== read header ====== # infile, filesize, own_file = _data_to_iobuffer(file_or_data) origsize = struct.unpack('<Q', infile.read(struct.calcsize('Q')))[0] iv = infile.read(16) decryptor = AES.new(password, mode=AES.MODE_CBC, IV=iv) # ====== outfile ====== # close_file = False if isinstance(outfile, string_types) and os.path.exists( os.path.dirname(outfile)): outfile = open(str(outfile), 'wb') close_file = True elif hasattr(outfile, 'write') and hasattr(outfile, 'flush'): close_file = True else: outfile = BytesIO() # ====== decryption ====== # while True: chunk = infile.read(chunksize) if len(chunk) == 0: break chunk = decryptor.decrypt(chunk) if bool(base64encode): chunk = base64.decodebytes(chunk) outfile.write(chunk) outfile.truncate(origsize) # ====== clean and return ====== # if own_file: infile.close() outfile.flush() if close_file: outfile.close() else: outfile.seek(0) data = outfile.read() outfile.close() return data
def download_extern_document_specific(self, model, id, document,filename=None, **kwargs): invoice = request.env[model].search([('id', '=', id)]) reponds = BytesIO() archive = zipfile.ZipFile(reponds, 'w', zipfile.ZIP_DEFLATED) if document == 'Doc1-3': for i, intern in enumerate(invoice.interns_pass): doc1_3 = invoice.create_doc_1_3(intern, i) archive.write(doc1_3.name, '1_3_%d_%s.docx' % ((i + 1), intern_utils.name_with_underscore(intern.name))) os.unlink(doc1_3.name) elif document == 'Doc1-10': for i, intern in enumerate(invoice.interns_pass): doc1_10 = invoice.create_doc_1_10(intern) archive.write(doc1_10.name, '1_10_%d_%s.docx' % ((i + 1), intern_utils.name_with_underscore(intern.name))) os.unlink(doc1_10.name) elif document == 'Doc1-13': doc1_13_1 = invoice.create_1_13_1() archive.write(doc1_13_1.name, u'1-13号 HOANG HUNG JAPAN 訓連センター.docx') os.unlink(doc1_13_1.name) doc1_13_2 = invoice.create_1_13_2() archive.write(doc1_13_2.name, u'1-13号HOANG HUNG 会社.docx') os.unlink(doc1_13_2.name) elif document == 'Doc1-20': doc1_20 = invoice.create_doc_1_20() archive.write(doc1_20.name, '1_20.docx') os.unlink(doc1_20.name) elif document == 'Doc1-21': for i, intern in enumerate(invoice.interns_pass): doc1_21 = invoice.create_doc_1_21(intern) archive.write(doc1_21.name, '1_21_%d_%s.docx' % ((i + 1), intern_utils.name_with_underscore(intern.name))) os.unlink(doc1_21.name) elif document == 'Doc1-28': for i, intern in enumerate(invoice.interns_pass): doc1_28 = invoice.create_doc_1_28(intern,i) archive.write(doc1_28.name, '1_28_%d_%s.docx' % ((i + 1), intern_utils.name_with_underscore(intern.name))) os.unlink(doc1_28.name) elif document == 'Doc1-29': doc1_29 = invoice.create_doc_1_29() archive.write(doc1_29.name, '1_29.docx') os.unlink(doc1_29.name) elif document == 'HDPC': for i, intern in enumerate(invoice.interns_pass): hdtn = invoice.create_hdtn(intern) archive.write(hdtn.name, 'hdtn_%d_%s.docx' % ((i + 1), intern_utils.name_with_underscore(intern.name))) os.unlink(hdtn.name) hdtv = invoice.create_hdtv(intern) archive.write(hdtv.name, 'hdtv_%d_%s.docx' % ((i + 1), intern_utils.name_with_underscore(intern.name))) os.unlink(hdtv.name) elif document == 'PROLETTER': doc_list_send = invoice.create_list_of_sent_en() archive.write(doc_list_send.name, u'推薦書 - ENG.docx') os.unlink(doc_list_send.name) doc_list_send_jp = invoice.create_list_of_sent_jp() archive.write(doc_list_send_jp.name, u'推薦書.docx') os.unlink(doc_list_send_jp.name) archive.close() reponds.flush() ret_zip = reponds.getvalue() reponds.close() return request.make_response(ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(filename))])
def parallel_summarize_master(sc, streams, subjects, mode, master_file, parallelism="all", localtime=False): """ Builds a set of dictionaries that get serialized and passed to Spark jobs to summarize data for and experiment. Args: sc (SparkContext): The SparkContext object to be used to run summarization jobs streams (List): Set of all streams to add to the data frame. subjects (List): Set of subjects to pull data for. mode (str): Start from scratch or add streams/subjects to existing data frame. master_file (str): Name of master data file that will be created. parallelism (str): One of the available parallelization schemes localtime (bool): Whether or not to shift marker and label streams to subjects' home time zone. """ #sc.setLogLevel(0) master_log = [] job_list = [] df_list = [] all_stream_metadata = {} #Extract streams names from stream list of dictionaries stream_names = [x["name"] for x in streams] #master_file = "master_summary_dataframe.pkl" if (os.path.isfile(master_file)): master_df = pickle.load(open(master_file, "rb")) print( "Found existing master summary data frame for %d subjects and %d features" % (master_df.shape[0], master_df.shape[1])) master_log.append( make_log( "", "", "", "INFO", "found existing master summary data frame for {} subjects and {} features" .format(master_df.shape[0], master_df.shape[1]))) existing_streams = list(master_df.columns) existing_subjects = list(master_df.index.levels[0]) #master file exists, so consider an incremental mode if (mode == "scratch" or mode == "test"): #Re-compute the master summary dataframe from scratch for the, #throwing out all olf data print("Mode=scratch: Re-computing from scratch") master_log.append( make_log("", "", "", "INFO", "mode=scratch: re-computing from scratch")) elif (mode == "replace_streams"): #Re-compute the summaries for the input streams #Will also compute from scratch for any new streams #.Only runs on existing subjects print( "Mode=replace_streams: Replacing old stream computations for existing users" ) master_log.append( make_log( "", "", "", "INFO", "mode=replace_streams: replacing old stream computations for existing users" )) subjects = existing_subjects elif (mode == "increment_streams"): #Compute summaries for the input streams, #skipping streams that already exist. Only #operates of existing subjects #Drop computation of streams that already exist new_stream_names = list(set(stream_names) - set(existing_streams)) new_streams = [] for s in streams: if s["name"] in new_stream_names: new_streams.append(s) streams = new_streams subjects = existing_subjects if (len(streams) == 0): print( "All streams have already been computed. No incremental additions." ) master_log.append( make_log( "", "", "", "INFO", "all streams have already been computed: no incremental additions" )) exit() else: print("Incrementing streams: ", streams) master_log.append( make_log("", "", "", "INFO", "incrementing streams {}".format(streams))) elif (mode == "increment_subjects"): #Compute summaries for the input subjects, #skipping subjects that already exist #Only operates on existing streams subjects = list(set(subjects) - set(existing_subjects)) streams = existing_streams if (len(subjects) == 0): print( "All subjects have already been computed. No incremental additions." ) master_log.append( make_log( "", "", "", "INFO", "all streams have already been computed: no incremental additions" )) exit() else: print("Incrementing subjects: ", subjects) master_log.append( make_log("", "", "", "INFO", "incrementing subjects {}".format(subjects))) else: print("Error: Summarization mode is not defined") master_log.append( make_log("", "", "", "ERROR", "summarization mode is not defined")) exit() else: if mode not in ["test", "scratch"]: print( "Mode is not test or scratch, but master data file does not exist to increment or replace" ) if (mode == "test"): #Test mode. Use five good user. #5 streams only for debug purposes. streams = streams[:20] subjects = [ "622bf725-2471-4392-8f82-fcc9115a3745", "d3d33d63-101d-44fd-b6b9-4616a803225d", "c1f31960-dee7-45ea-ac13-a4fea1c9235c", "7b8358f3-c96a-4a17-87ab-9414866e18db", "8a3533aa-d6d4-450c-8232-79e4851b6e11" ] # build up dictionary, write to string, pass to write_..._for_subs...() out_list = [] for i in range(0, len(subjects)): job_dict = {} job_dict["subject"] = subjects[i] job_dict["streams"] = streams job_dict["localtime"] = localtime json_string = json.dumps(job_dict) if (parallelism == "single"): out_list.append(parallel_summarize_worker(json_string)) else: job_list.append(json_string) if (parallelism == "by-subject"): summ_rdd = sc.parallelize(job_list, len(job_list)) job = summ_rdd.map(parallel_summarize_worker) out_list = job.collect() df_list_data, meta_data_list, subject_logs = zip(*out_list) #Combine all meta data dictionaries into #one dictionary. Keys are stream/field ids #values are meta data elements all_meta_data = {} for m in meta_data_list: all_meta_data.update(m) # process logs -- append to master log, write to CSV, etc. master_log.extend( subject_logs ) # FIXME: this should already produce a flattened list, shouldn't need next line master_log = [item for sublist in master_log for item in sublist] # write master log to CSV if not os.path.isdir("run_logs"): os.makedirs("run_logs") with open( "run_logs/{}_{}.csv".format( "master_summarizer", datetime.datetime.now().strftime("%m-%d-%y_%H:%M")), "w") as f: writer = csv.writer(f) writer.writerows(master_log) #df_data=df_list_data df_data = pd.concat(df_list_data, axis=0, keys=subjects) df_data.index.levels[1].name = "Date" df_data.index.levels[0].name = "Participant" if (mode == "scratch" or mode == "test"): #Re-compute the master summary dataframe from scratch for the, #throwing out all olf data master_df = df_data elif (mode == "replace_streams"): #Re-compute the summaries for the input streams #Will also compute from scratch for any new streams. #Only runs on existing subjects #Drop existing streams stream_intersect = list( set(existing_streams).intersection(stream_names)) #Replace old streams and add new streams master_df = master_df.drop(labels=stream_intersect) master_df = pd.concat([master_df, df_data], axis=1) elif (mode == "increment_streams"): #Compute summaries for the input streams, #skipping streams that already exist master_df = pd.concat([master_df, df_data], axis=1) elif (mode == "increment_subjects"): #Compute summaries for the input subjects, #skipping subjects that already exist master_df = pd.concat([master_df, df_data], axis=0) #Write to disk #Add current timestamp to master file name fname, fext = os.path.splitext(master_file) timestr = time.strftime("%Y%m%d-%H%M%S") if (mode == "test"): master_file = fname + "-test-" + timestr + fext else: master_file = fname + "-" + timestr + fext pickle.dump({ "dataframe": master_df, "metadata": all_meta_data }, open(master_file, "wb"), protocol=2) try: #Write to minio object_name = "summary.dataframe" bucket_name = "master.summary" if not mC.bucket_exists(bucket_name): mC.make_bucket(bucket_name) bytes = BytesIO() # TODO: check with Ben on this change: df --> df_data pickle.dump(df_data, bytes, protocol=2) bytes.flush() bytes.seek(0) mC.put_object(bucket_name, object_name, bytes, len(bytes.getvalue())) bytes.close() except Exception as e: print(" ! Warning: Could not save to minio") print("-" * 50) print(traceback.format_exc()) print(e) print("-" * 50)
def _clone(x): out_buf = BytesIO() joblib.dump(x, out_buf) out_buf.flush() in_buf = BytesIO(out_buf.getvalue()) return joblib.load(in_buf)
class AMQPWriter(object): """Convert higher-level AMQP types to bytestreams.""" def __init__(self, dest=None): """dest may be a file-type object (with a write() method). If None then a BytesIO is created, and the contents can be accessed with this class's getvalue() method.""" self.out = BytesIO() if dest is None else dest self.bits = [] self.bitcount = 0 def _flushbits(self): if self.bits: out = self.out for b in self.bits: out.write(pack('B', b)) self.bits = [] self.bitcount = 0 def close(self): """Pass through if possible to any file-like destinations.""" try: self.out.close() except AttributeError: pass def flush(self): """Pass through if possible to any file-like destinations.""" try: self.out.flush() except AttributeError: pass def getvalue(self): """Get what's been encoded so far if we're working with a BytesIO.""" self._flushbits() return self.out.getvalue() def write(self, s): """Write a plain Python string with no special encoding in Python 2.x, or bytes in Python 3.x""" self._flushbits() self.out.write(s) def write_bit(self, b): """Write a boolean value.""" b = 1 if b else 0 shift = self.bitcount % 8 if shift == 0: self.bits.append(0) self.bits[-1] |= (b << shift) self.bitcount += 1 def write_octet(self, n): """Write an integer as an unsigned 8-bit value.""" if n < 0 or n > 255: raise FrameSyntaxError('Octet {0!r} out of range 0..255'.format(n)) self._flushbits() self.out.write(pack('B', n)) def write_short(self, n): """Write an integer as an unsigned 16-bit value.""" if n < 0 or n > 65535: raise FrameSyntaxError( 'Octet {0!r} out of range 0..65535'.format(n)) self._flushbits() self.out.write(pack('>H', int(n))) def write_long(self, n): """Write an integer as an unsigned2 32-bit value.""" if n < 0 or n >= 4294967296: raise FrameSyntaxError( 'Octet {0!r} out of range 0..2**31-1'.format(n)) self._flushbits() self.out.write(pack('>I', n)) def write_longlong(self, n): """Write an integer as an unsigned 64-bit value.""" if n < 0 or n >= 18446744073709551616: raise FrameSyntaxError( 'Octet {0!r} out of range 0..2**64-1'.format(n)) self._flushbits() self.out.write(pack('>Q', n)) def write_shortstr(self, s): """Write a string up to 255 bytes long (after any encoding). If passed a unicode string, encode with UTF-8. """ self._flushbits() if isinstance(s, string): s = s.encode('utf-8') if len(s) > 255: raise FrameSyntaxError('Shortstring overflow ({0} > 255)'.format( len(s))) self.write_octet(len(s)) self.out.write(s) def write_longstr(self, s): """Write a string up to 2**32 bytes long after encoding. If passed a unicode string, encode as UTF-8. """ self._flushbits() if isinstance(s, string): s = s.encode('utf-8') self.write_long(len(s)) self.out.write(s) def write_table(self, d): """Write out a Python dictionary made of up string keys, and values that are strings, signed integers, Decimal, datetime.datetime, or sub-dictionaries following the same constraints.""" self._flushbits() table_data = AMQPWriter() for k, v in items(d): table_data.write_shortstr(k) table_data.write_item(v, k) table_data = table_data.getvalue() self.write_long(len(table_data)) self.out.write(table_data) def write_item(self, v, k=None): if isinstance(v, (string_t, bytes)): if isinstance(v, string): v = v.encode('utf-8') self.write(b'S') self.write_longstr(v) elif isinstance(v, bool): self.write(pack('>cB', b't', int(v))) elif isinstance(v, float): self.write(pack('>cd', b'd', v)) elif isinstance(v, int_types): self.write(pack('>ci', b'I', v)) elif isinstance(v, Decimal): self.write(b'D') sign, digits, exponent = v.as_tuple() v = 0 for d in digits: v = (v * 10) + d if sign: v = -v self.write_octet(-exponent) self.write(pack('>i', v)) elif isinstance(v, datetime): self.write(b'T') self.write_timestamp(v) elif isinstance(v, dict): self.write(b'F') self.write_table(v) elif isinstance(v, (list, tuple)): self.write(b'A') self.write_array(v) elif v is None: self.write(b'V') else: err = (ILLEGAL_TABLE_TYPE_WITH_KEY.format(type(v), k, v) if k else ILLEGAL_TABLE_TYPE.format(type(v), v)) raise FrameSyntaxError(err) def write_array(self, a): array_data = AMQPWriter() for v in a: array_data.write_item(v) array_data = array_data.getvalue() self.write_long(len(array_data)) self.out.write(array_data) def write_timestamp(self, v): """Write out a Python datetime.datetime object as a 64-bit integer representing seconds since the Unix epoch.""" self.out.write(pack('>Q', long_t(calendar.timegm(v.utctimetuple()))))
class ChunkChannel(object): """ Reader/writer for chunked data. .. note:: logs at DEBUG level """ max_chunk_size = 65535 def __init__(self, sock): self.socket = sock self.raw = BytesIO() self.output_buffer = [] self.output_size = 0 self._recv_buffer = b"" def write(self, b): """ Write some bytes, splitting into chunks if necessary. """ max_chunk_size = self.max_chunk_size output_buffer = self.output_buffer while b: size = len(b) future_size = self.output_size + size if future_size >= max_chunk_size: end = max_chunk_size - self.output_size output_buffer.append(b[:end]) self.output_size = max_chunk_size b = b[end:] self.flush() else: output_buffer.append(b) self.output_size = future_size b = b"" def flush(self, end_of_message=False): """ Flush everything written since the last chunk to the stream, followed by a zero-chunk if required. """ output_buffer = self.output_buffer if output_buffer: lines = [struct_pack(">H", self.output_size)] + output_buffer else: lines = [] if end_of_message: lines.append(b"\x00\x00") if lines: self.raw.writelines(lines) self.raw.flush() del output_buffer[:] self.output_size = 0 def send(self): """ Send all queued messages to the server. """ data = self.raw.getvalue() if __debug__: log_debug("C: %s", ":".join(map(hex2, data))) self.socket.sendall(data) self.raw.seek(self.raw.truncate(0)) def _recv(self, size): # If data is needed, keep reading until all bytes have been received remaining = size - len(self._recv_buffer) ready_to_read = None while remaining > 0: # Read up to the required amount remaining b = self.socket.recv(8192) if b: if __debug__: log_debug("S: %s", ":".join(map(hex2, b))) else: if ready_to_read is not None: raise ProtocolError("Server closed connection") remaining -= len(b) self._recv_buffer += b # If more is required, wait for available network data if remaining > 0: ready_to_read, _, _ = select((self.socket, ), (), (), 0) while not ready_to_read: ready_to_read, _, _ = select((self.socket, ), (), (), 0) # Split off the amount of data required and keep the rest in the buffer data, self._recv_buffer = self._recv_buffer[:size], self._recv_buffer[ size:] return data def chunk_reader(self): chunk_size = -1 while chunk_size != 0: chunk_header = self._recv(2) chunk_size, = struct_unpack_from(">H", chunk_header) if chunk_size > 0: data = self._recv(chunk_size) yield data def close(self): """ Shut down and close the connection. """ if __debug__: log_info("~~ [CLOSE]") socket = self.socket socket.shutdown(SHUT_RDWR) socket.close()
def createGeoJson(geoms, output=None, srs=4326, topo=False, fill=''): """Convert a set of geometries to a geoJSON object""" if(srs): srs = SRS.loadSRS(srs) # arrange geom, index, and data if isinstance(geoms, ogr.Geometry): # geoms is a single geometry finalGeoms = [geoms, ] data = None index = [0, ] elif isinstance(geoms, pd.Series): index = geoms.index finalGeoms = geoms.values data = None elif isinstance(geoms, pd.DataFrame): index = geoms.index finalGeoms = geoms.geom.values data = geoms.loc[:, geoms.columns != 'geom'] data["_index"] = index else: finalGeoms = list(geoms) data = None index = list(range(len(finalGeoms))) if len(finalGeoms) == 0: raise GeoKitVectorError("Empty geometry list given") # Transform? if not srs is None: finalGeoms = GEOM.transform(finalGeoms, toSRS=srs) # Make JSON object from io import BytesIO if not output is None and not isinstance(output, str): if not output.writable(): raise GeoKitVectorError("Output object is not writable") if topo: fo = BytesIO() else: fo = output elif isinstance(output, str) and not topo: fo = open(output, "wb") else: fo = BytesIO() fo.write( bytes('{"type":"FeatureCollection","features":[', encoding='utf-8')) for j, i, g in zip(range(len(index)), index, finalGeoms): fo.write(bytes('%s{"type":"Feature",' % ("" if j == 0 else ","), encoding='utf-8')) if data is None: fo.write( bytes('"properties":{"_index":%s},' % str(i), encoding='utf-8')) else: fo.write(bytes('"properties":%s,' % data.loc[i].fillna(fill).to_json(), encoding='utf-8')) fo.write(bytes('"geometry":%s}' % g.ExportToJson(), encoding='utf-8')) #fo.write(bytes('"geometry": {"type": "Point","coordinates": [125.6, 10.1] }}', encoding='utf-8')) fo.write(bytes("]}", encoding='utf-8')) fo.flush() # Put in the right format if topo: from topojson import conversion from io import TextIOWrapper fo.seek(0) topo = conversion.convert(TextIOWrapper( fo), object_name="primary") # automatically closes fo topo = str(topo).replace("'", '"') # Done! if output is None: if topo: return topo else: fo.seek(0) geojson = fo.read() fo.close() return geojson.decode('utf-8') elif isinstance(output, str): if topo: with open(output, "w") as fo: fo.write(topo) else: pass # we already wrote to the file! return output else: if topo: output.write(bytes(topo, encoding='utf-8')) else: pass # We already wrote to the file! return None
class StreamIO(object): stream = None endian = None labels = {} # I/O functions read_func = None write_func = None # attributes can_seek = False can_tell = False def __init__(self, stream=None, endian: Endian = Endian.LITTLE): self.reset() self.set_stream(stream) self.set_endian(endian) self.set_io_funcs() # reset def reset(self) -> None: self.stream = None self.endian = None self.labels = {} self.read_func = None self.write_func = None self.can_seek = False self.can_tell = False # add with functionality def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() # shortcuts def __int__(self) -> int: return self.tell() def __len__(self) -> int: return self.length() def __bytes__(self) -> bytes: return self.getvalue() def __iadd__(self, other: int) -> None: self.seek(self.tell() + other) def __isub__(self, other: int) -> None: self.seek(self.tell() - other) def __imul__(self, other: int) -> None: self.seek(self.tell() * other) def __ifloordiv__(self, other: int) -> None: self.seek(self.tell() // other) def __itruediv__(self, other: int) -> None: self.seek(self.tell() // other) def __getitem__(self, key: int | slice): if isinstance(key, slice): return self.read_bytes_at(key.start, key.stop - key.start) return self.read_byte_at(key) def __setitem__(self, key: int | slice, value: int | bytes | bytearray) -> int: if isinstance(key, slice): return self.write_bytes_at(key.start, value) if isinstance(value, bytes) or isinstance(value, bytearray): if len(value) > 1: return self.write_bytes_at(key, value) else: return self.write_byte_at(key, value[0]) else: return self.write_byte_at(key, value) # virtual file pointer @property def offset(self) -> int: return self.tell() @offset.setter def offset(self, value: int) -> None: self.seek(value) # utilities def set_stream(self, stream) -> None: """ Set stream to read/write from/to :param stream: The stream to interact with :return: None """ if stream is None: self.stream = BytesIO() elif type(stream) in [bytes, bytearray, memoryview]: self.stream = BytesIO(stream) elif type(stream) == str: if isfile(stream): self.stream = open(stream, "r+b") else: self.stream = open(stream, "wb") else: self.stream = stream self.can_seek = self.stream.seekable() self.can_tell = self.stream.seekable() def set_endian(self, endian: Endian) -> None: """ Set the endian you want to use for reading/writing data in the stream :param endian: LITTLE, BIG, NETWORK, or NATIVE :return: None """ endian = int(endian) endians = ["<", ">", "!", "@"] if endian in range(0, len(endians)): self.endian = endians[endian] def set_read_func(self, name: str) -> None: #, *param_types): """ Set the function name in the stream of the read function :param name: The name of the read function :return: None """ if hasattr(self.stream, name): self.read_func = getattr(self.stream, name) def set_write_func(self, name: str) -> None: #, *param_types): """ Set the function name in the stream of the write function :param name: The name of the write function :return: None """ if hasattr(self.stream, name): self.write_func = getattr(self.stream, name) def set_io_funcs(self, read_name: str = "read", write_name: str = "write") -> None: """ Set the read/write function names in the stream :param read_name: The name of the read function :param write_name: The name of the write function :return: None """ self.set_read_func(read_name) self.set_write_func(write_name) def tell(self) -> int: """ Tell the current position of the stream if supported :return: The position of the stream """ if self.can_tell: return self.stream.tell() raise NotImplementedError( "tell isn't implemented in the specified stream!") def seek(self, index: int, whence: int = SEEK_SET) -> int: """ Jump to a position in the stream if supported :param index: The offset to jump to :param whence: Index is interpreted relative to the position indicated by whence (SEEK_SET, SEEK_CUR, and SEEK_END in io library) :return: The new absolute position """ if self.can_seek: return self.stream.seek(index, whence) raise NotImplementedError( "seek isn't implemented in the specified stream!") def seek_start(self) -> int: """ Jump to the beginning of the stream if supported :return: The new absolute position """ return self.stream.seek(0) def seek_end(self) -> int: """ Jump to the end of the stream if supported :return: The new absolute position """ return self.stream.seek(0, SEEK_END) def length(self) -> int: """ Get the length of the stream if supported :return: The total length of the stream """ loc = self.tell() self.seek_end() size = self.tell() self.seek(loc) return size def getvalue(self) -> bytes | bytearray: """ Get the stream's output :return: The stream's data as bytes or bytearray """ return self.stream.getvalue() def getbuffer(self) -> bytes | bytearray: """ Get the stream's buffer :return: The stream's buffer as bytes or bytearray """ return self.stream.getbuffer() def flush(self) -> None: """ Write the data to the stream :return: None """ return self.stream.flush() def close(self) -> None: """ Close the stream :return: None """ self.stream.close() # labeling def get_labels(self) -> list: return list(self.labels.keys()) def label_exists(self, name: str) -> bool: return name in self.get_labels() def get_label(self, name: str) -> int: return self.labels[name] def set_label(self, name: str, offset: int = None, overwrite: bool = True) -> int: if not overwrite and self.label_exists(name): name += ("_" + rand_str(4)) if offset is not None and offset >= 0: loc = offset else: loc = self.tell() self.labels[name] = loc return loc def rename_label(self, old_name: str, new_name: str, overwrite: bool = True) -> bool: assert old_name != new_name, "Old and new label names shouldn't be the same" if self.label_exists(old_name): value = self.get_label(old_name) self.del_label(old_name) self.set_label(new_name, value, overwrite) return False def goto_label(self, name: str) -> int: return self.seek(self.labels[name]) def del_label(self, name: str) -> int: return self.labels.pop(name) # base I/O methods def read(self, num: int = None) -> bytes | bytearray: if num is None: return self.read_func() return self.read_func(num) def write(self, data: bytes | bytearray | int) -> int: if type(data) == int: data = bytes([data]) return self.write_func(data) def stream_unpack(self, fmt: str) -> tuple | list: fmt = f"{self.endian}{fmt}" return unpack(fmt, self.read(calcsize(fmt))) def stream_pack(self, fmt: str, *values) -> int: fmt = f"{self.endian}{fmt}" return self.write(pack(fmt, *values)) def stream_unpack_array(self, t: str, num: int) -> tuple | list: fmt = f"{self.endian}{num}{t}" return unpack(fmt, self.read(calcsize(fmt))) def stream_pack_array(self, t: str, *values) -> int: fmt = f"{self.endian}{len(values)}{t}" return self.write(pack(fmt, *values)) # bytes def read_sbyte(self) -> int: (val, ) = self.stream_unpack("b") return val def read_sbyte_at(self, offset: int, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.read_sbyte() if ret: self.seek(loc) return output def read_sbytes(self, num: int) -> Tuple[int] | List[int]: return self.stream_unpack_array("b", num) def read_sbytes_at(self, offset: int, num: int, ret: bool = True) -> Tuple[int] | List[int]: loc = self.tell() self.seek(offset) output = self.read_sbytes(num) if ret: self.seek(loc) return output def write_sbyte(self, value: int) -> int: return self.stream_pack("b", value) def write_sbyte_at(self, offset: int, value: int, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.write_sbyte(value) if ret: self.seek(loc) return output def write_sbytes(self, values: bytes | bytearray) -> int: return self.stream_pack_array("b", *values) def write_sbytes_at(self, offset: int, values: bytes | bytearray, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.write_sbytes(values) if ret: self.seek(loc) return output # bytes def read_byte(self) -> int: (val, ) = self.stream_unpack("B") return val read_ubyte = read_byte def read_byte_at(self, offset: int, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.read_byte() if ret: self.seek(loc) return output read_bytes = read read_ubytes = read def read_bytes_at(self, offset: int, num: int, ret: bool = True) -> bytes: loc = self.tell() self.seek(offset) output = self.read_bytes(num) if ret: self.seek(loc) return output read_ubytes_at = read_bytes_at def write_byte(self, value: int): return self.stream_pack("B", value) write_ubyte = write_byte def write_byte_at(self, offset: int, value: int, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.write_byte(value) if ret: self.seek(loc) return output write_bytes = write write_ubyte_at = write_byte_at def write_bytes_at(self, offset: int, values: bytes | bytearray, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.write_bytes(values) if ret: self.seek(loc) return output write_ubytes_at = write_bytes_at def load_from_buffer(self, data: bytes | bytearray) -> int: return self.write_bytes(data) # boolean def read_bool(self) -> bool: (val, ) = self.stream_unpack("?") return val def read_bool_array(self, num: int) -> Tuple[bool]: return self.stream_unpack_array("?", num) def write_bool(self, value: bool) -> int: return self.stream_pack("?", value) def write_bool_array(self, values: List[bool] | Tuple[bool]) -> int: return self.stream_pack_array("?", *values) # int16/short def read_int16(self) -> int: (val, ) = self.stream_unpack("h") return val read_short = read_int16 def read_int16_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("h", num) read_short_array = read_int16_array def write_int16(self, value: int) -> int: return self.stream_pack("h", value) write_short = write_int16 def write_int16_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("h", *values) write_short_array = write_int16_array # uint16/ushort def read_uint16(self) -> int: (val, ) = self.stream_unpack("H") return val read_ushort = read_uint16 def read_uint16_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("H", num) read_ushort_array = read_uint16_array def write_uint16(self, value: int) -> int: return self.stream_pack("H", value) write_ushort = write_uint16 def write_uint16_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("H", *values) write_ushort_array = write_uint16_array # int32/int/long def read_int32(self) -> int: (val, ) = self.stream_unpack("i") return val read_int = read_int32 read_long = read_int32 def read_int32_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("i", num) read_int_array = read_int32_array read_long_array = read_int32_array def write_int32(self, value: int) -> int: return self.stream_pack("i", value) write_int = write_int32 write_long = write_int32 def write_int32_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("i", *values) write_int_array = write_int32_array write_long_array = write_int32_array # uint32/uint/ulong def read_uint32(self) -> int: (val, ) = self.stream_unpack("I") return val read_uint = read_uint32 read_ulong = read_uint32 def read_uint32_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("I", num) read_uint_array = read_uint32_array read_ulong_array = read_uint32_array def write_uint32(self, value: int) -> int: return self.stream_pack("I", value) write_uint = write_uint32 write_ulong = write_uint32 def write_uint32_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("I", *values) write_uint_array = write_uint32_array write_ulong_array = write_uint32_array # int64/longlong def read_int64(self) -> int: return self.stream_unpack("q")[0] read_longlong = read_int64 def read_int64_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("q", num) read_longlong_array = read_int64_array def write_int64(self, value: int) -> int: return self.stream_pack("q", value) write_longlong = write_int64 def write_int64_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("q", *values) write_longlong_array = write_int64_array # uint64/ulonglong def read_uint64(self) -> int: (val, ) = self.stream_unpack("Q") return val read_ulonglong = read_uint64 def read_uint64_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("Q", num) read_ulonglong_array = read_uint64_array def write_uint64(self, value: int) -> int: return self.stream_pack("Q", value) write_ulonglong = write_uint64 def write_uint64_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("Q", *values) write_ulonglong_array = write_uint64_array # float32/single def read_float32(self) -> float: (val, ) = self.stream_unpack("f") return val read_single = read_float32 def read_float32_array(self, num: int) -> Tuple[float]: return self.stream_unpack_array("f", num) read_single_array = read_float32_array def write_float32(self, value: float) -> float: return self.stream_pack("f", value) write_single = write_float32 def write_float32_array(self, values: List[float] | Tuple[float]) -> int: return self.stream_pack_array("f", *values) write_single_array = write_float32_array # float64/double def read_float64(self) -> float: (val, ) = self.stream_unpack("d") return val read_double = read_float64 def read_float64_array(self, num: int) -> Tuple[float]: return self.stream_unpack_array("d", num) read_double_array = read_float64_array def write_float64(self, value: float) -> float: return self.stream_pack("d", value) write_double = write_float64 def write_float64_array(self, values: List[float] | Tuple[float]) -> int: return self.stream_pack_array("d", *values) write_double_array = write_float64_array # varint def read_varint(self) -> int: shift = 0 result = 0 while True: i = self.read_byte() result |= (i & 0x7f) << shift shift += 7 if not (i & 0x80): break return result def read_varint_array(self, num: int) -> Tuple[int]: return tuple([self.read_varint() for i in range(num)]) def write_varint(self, num: int) -> int: buff = b"" while True: towrite = num & 0x7f num >>= 7 if num: buff += bytes([(towrite | 0x80)]) else: buff += bytes([towrite]) break return self.write_bytes(buff) def write_varint_array(self, values: List[int] | Tuple[int]) -> int: return sum([self.write_varint(x) for x in values]) # strings def read_int7(self) -> int: index = 0 result = 0 while True: byte_value = self.read_byte() result |= (byte_value & 0x7F) << (7 * index) if byte_value & 0x80 == 0: break index += 1 return result def read_int7_array(self, num: int) -> Tuple[int]: return tuple([self.read_int7() for i in range(num)]) def write_int7(self, value: int) -> int: data = b"" num = value while num >= 0x80: data += bytes([((num | 0x80) & 0xFF)]) num >>= 7 data += bytes([num & 0xFF]) return self.write(data) def write_int7_array(self, values: List[int] | Tuple[int]) -> int: return sum([self.write_int7(x) for x in values]) def read_string(self, encoding: str = "UTF8") -> str: str_size = self.read_int7() if str_size <= 0: return "" return self.read(str_size).decode(encoding) def read_c_string(self, encoding: str = "UTF8") -> str: output = b"" while (tmp := self.read(1)) != b"\x00": output += tmp return output.rstrip(b"\x00").decode(encoding)
def post(self): print('in------------upload') file1 = self.request.files['file1'][0] original_fname = file1['filename'] print(original_fname) extension = os.path.splitext(original_fname)[1] fname = ''.join(random.choice(string.ascii_lowercase + string.digits) for x in range(6)) final_filename= fname+extension output_file = open(os.path.join(options.storage_path, final_filename), 'wb') output_file.write(file1['body']) raw_file_name =options.storage_path + final_filename # if extension == 'wav': a, b, c = str(time.time()).partition('.') time_stamp = ''.join([a, b, c.zfill(2)]) memory_buffer = BytesIO() raw_file_obj='' # if extension == 'wav': # sound = AudioSegment.from_wav(raw_file_name) # can do same for mp3 and other formats # # raw_file_obj = sound._data # returns byte string # # raw_file_obj = open(raw, 'rb', os.O_NONBLOCK) print(extension, "2") xx="" index=0 acoustic_scores={} if extension=='.raw' or extension=='.wav' : # if extension == 'raw': print(extension) raw_file_obj = open(raw_file_name, 'rb', os.O_NONBLOCK) with contextlib.closing(wave.open(memory_buffer, 'wb')) as wave_obj: wave_obj.setnchannels(1) wave_obj.setframerate(16000) wave_obj.setsampwidth(2) # raw_file_obj.seek(-640000, 2) wave_obj.writeframes(raw_file_obj.read()) memory_buffer.flush() memory_buffer.seek(0) acoustic_scores = acoustic_identification4.dialect_estimation(memory_buffer) index = 1 # elif extension=='.wav' : # acoustic_scores = acoustic_identification4.dialect_estimationWav(raw_file_name) # # if extension == 'raw': # # index = 1 if index == 1: acoustic_weight = 1.0 # - lexical_weight # weighted_lexical = {dialect: value * lexical_weight for dialect, value in lexical_scores.items()} weighted_acoustic = {dialect: value * acoustic_weight for dialect, value in acoustic_scores.items()} # did_scores = {key: weighted_lexical[key] + weighted_acoustic[key] for key in [u'EGY', u'GLF', u'LAV', # u'MSA', u'NOR']} did_scores = {key: weighted_acoustic[key] for key in ['ALG', 'EGY', 'IRA', 'JOR', 'KSA', 'KUW', 'LEB', 'LIB', 'MAU', 'MOR', 'OMA', 'PAL', 'QAT', 'SUD', 'SYR', 'UAE', 'YEM']} # did_scores = {key: weighted_acoustic[key] for key in [u'ALG', u'EGY',u'IRA', u'JOR', u'KSA', u'KUW', u'LEB', u'LIB', u'MAU', u'MOR', u'OMA', u'PAL', u'QAT', u'SUD', u'SYR', u'UAE', u'YEM']} print(did_scores) json_list = list() # json_list.append(text) # json_list.append(utterance) json_dict = {'final_score': did_scores} # print(did_scores) # json_dict = {'lexical_score': lexical_scores, 'acoustic_score': acoustic_scores, 'final_score': did_scores} json_list.append(json_dict) text_file = os.path.join("uploads/", time_stamp + '.json') with open(text_file, mode='w') as json_obj: json.dump(json_list, json_obj) # return json_obj # event['result']['hypotheses'].append(did_scores) # return did_scores xx=json.dumps(json_list) print(xx) self.write(xx)
def serialize_equal(serializable, target): output = BytesIO() serializable.serialize(output) output.flush() str_equal(str(output.getvalue(), "utf-8"), target) output.close()
class AMQPWriter(object): """ Convert higher-level AMQP types to bytestreams. """ def __init__(self, dest=None): """ dest may be a file-type object (with a write() method). If None then a BytesIO is created, and the contents can be accessed with this class's getvalue() method. """ if dest is None: self.out = BytesIO() else: self.out = dest self.bits = [] self.bitcount = 0 def _flushbits(self): if self.bits: for b in self.bits: self.out.write(pack('B', b)) self.bits = [] self.bitcount = 0 def close(self): """ Pass through if possible to any file-like destinations. """ if hasattr(self.out, 'close'): self.out.close() def flush(self): """ Pass through if possible to any file-like destinations. """ if hasattr(self.out, 'flush'): self.out.flush() def getvalue(self): """ Get what's been encoded so far if we're working with a BytesIO. """ self._flushbits() return self.out.getvalue() def write(self, s): """ Write a plain Python string with no special encoding in Python 2.x, or bytes in Python 3.x """ self._flushbits() self.out.write(s) def write_bit(self, b): """ Write a boolean value. """ if b: b = 1 else: b = 0 shift = self.bitcount % 8 if shift == 0: self.bits.append(0) self.bits[-1] |= (b << shift) self.bitcount += 1 def write_octet(self, n): """ Write an integer as an unsigned 8-bit value. """ if (n < 0) or (n > 255): raise ValueError('Octet out of range 0..255') self._flushbits() self.out.write(pack('B', n)) def write_short(self, n): """ Write an integer as an unsigned 16-bit value. """ if (n < 0) or (n > 65535): raise ValueError('Octet out of range 0..65535') self._flushbits() self.out.write(pack('>H', n)) def write_long(self, n): """ Write an integer as an unsigned2 32-bit value. """ if (n < 0) or (n >= (2**32)): raise ValueError('Octet out of range 0..2**31-1') self._flushbits() self.out.write(pack('>I', n)) def write_longlong(self, n): """ Write an integer as an unsigned 64-bit value. """ if (n < 0) or (n >= (2**64)): raise ValueError('Octet out of range 0..2**64-1') self._flushbits() self.out.write(pack('>Q', n)) def write_shortstr(self, s): """ Write a string up to 255 bytes long (after any encoding). If passed a unicode string, encode with UTF-8. """ self._flushbits() if isinstance(s, unicode): s = s.encode('utf-8') if len(s) > 255: raise ValueError('String too long') self.write_octet(len(s)) self.out.write(s) def write_longstr(self, s): """ Write a string up to 2**32 bytes long after encoding. If passed a unicode string, encode as UTF-8. """ self._flushbits() if isinstance(s, unicode): s = s.encode('utf-8') self.write_long(len(s)) self.out.write(s) def write_table(self, d): """ Write out a Python dictionary made of up string keys, and values that are strings, signed integers, Decimal, datetime.datetime, or sub-dictionaries following the same constraints. """ self._flushbits() table_data = AMQPWriter() for k, v in d.items(): table_data.write_shortstr(k) if isinstance(v, basestring): if isinstance(v, unicode): v = v.encode('utf-8') table_data.write(byte(83)) # 'S' table_data.write_longstr(v) elif isinstance(v, (int, long)): table_data.write(byte(73)) # 'I' table_data.write(pack('>i', v)) elif isinstance(v, Decimal): table_data.write(byte(68)) # 'D' sign, digits, exponent = v.as_tuple() v = 0 for d in digits: v = (v * 10) + d if sign: v = -v table_data.write_octet(-exponent) table_data.write(pack('>i', v)) elif isinstance(v, datetime): table_data.write(byte(84)) # 'T' table_data.write_timestamp(v) ## FIXME: timezone ? elif isinstance(v, dict): table_data.write(byte(70)) # 'F' table_data.write_table(v) else: raise ValueError('%s not serializable in AMQP' % repr(v)) table_data = table_data.getvalue() self.write_long(len(table_data)) self.out.write(table_data) def write_timestamp(self, v): """ Write out a Python datetime.datetime object as a 64-bit integer representing seconds since the Unix epoch. """ self.out.write(pack('>q', long(mktime(v.timetuple()))))
class NANDImage: mode = MODE_FILE stream = None file_size = 0 flash_size = 0 num_pages = 0 @property def file_offset(self) -> int: return self._file_offset @file_offset.setter def file_offset(self, offset: int) -> None: self._flash_offset = self.file_to_flash_offset(offset) self._file_offset = offset self.stream.seek(self._file_offset) @property def flash_offset(self) -> int: return self._flash_offset @flash_offset.setter def flash_offset(self, offset: int) -> None: self._file_offset = self.flash_to_file_offset(offset) self._flash_offset = offset self.stream.seek(self._file_offset) def __init__(self, filename_or_data: (str, bytes, bytearray), mode: int = MODE_FILE): self.reset() self.mode = mode if type(filename_or_data) == str: self.stream = open(filename_or_data, "r+b") elif type(filename_or_data) in [bytes, bytearray]: self.stream = BytesIO(filename_or_data) # seek to the end self.file_seek(0, SEEK_END) # get size with spare data self.file_size = self.file_tell() # get number of pages self.num_pages = self.file_size // 528 # get size without spare data self.flash_size = self.num_pages * 512 # seek back to the start self.file_seek(0) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb) -> None: self.stream.flush() self.stream.close() def reset(self) -> None: self.mode = MODE_FILE self.stream = None self.file_size = 0 self.flash_size = 0 self.num_pages = 0 # stream primitives def seek(self, offset: int, whence: int = SEEK_SET) -> int: return getattr(self, ("file" if self.mode == MODE_FILE else "flash") + "_seek")(offset, whence) def tell(self) -> int: return getattr(self, ("file" if self.mode == MODE_FILE else "flash") + "_tell")() def read(self, num: int = 0) -> bytes: return getattr(self, ("file" if self.mode == MODE_FILE else "flash") + "_read")(num) def write(self, data: (bytes, bytearray)) -> int: return getattr(self, ("file" if self.mode == MODE_FILE else "flash") + "_write")(data) def flush(self) -> None: self.stream.flush() # extended I/O functions # offset translation functions def file_to_flash_offset(self, file_offset: int) -> int: """ Convert file offset to flash offset :param flash_offset: :return: """ if file_offset < 0: file_offset = self.flash_size + file_offset return ((file_offset // 528) * 512) + (file_offset % 528) def flash_to_file_offset(self, flash_offset: int) -> int: """ Convert flash offset to file offset :param file_offset: :return: """ if flash_offset < 0: flash_offset = self.file_size + flash_offset return ((flash_offset // 512) * 528) + (flash_offset % 512) def file_offset_to_page(self, offset: int) -> int: """ Get the page a file offset lands on :param offset: :return: """ return (offset // 528) + 1 def flash_offset_to_page(self, offset: int) -> int: """ Get the page a flash offset lands on :param offset: :return: """ return (offset // 512) + 1 def flash_calc_page_offset(self, offset: int) -> int: """ Calculates the start or end offset for page I/O :param offset: :return: """ return offset - ((offset // 512) * 512) def calc_page_offset(self, num: int) -> int: """ Calculates the start offset for a given page number :param num: The page number :return: """ return (num - 1) * 528 def calc_spare_offset(self, num: int) -> int: so = ((num - 1) * 528) - 16 return 512 if (num - 1) == 0 else so # file primitives def file_tell(self) -> int: return self.stream.tell() def file_seek(self, offset: int, whence: int = SEEK_SET) -> int: no = self.stream.seek(offset, whence) self.file_offset = no return no def file_read(self, num: int = 0) -> bytes: if num > 0: data = self.stream.read(num) else: data = self.stream.read() self.file_offset = self.file_tell() return data def file_write(self, data: (bytes, bytearray)) -> int: nw = self.stream.write(data) self.file_offset = self.file_tell() return nw # flash primitives def flash_tell(self) -> int: return self.file_to_flash_offset(self.file_tell()) def flash_seek(self, offset: int, whence: int = SEEK_SET) -> int: no = 0 # promise if whence == SEEK_SET: if offset >= 0: # no = self.file_seek(self.flash_to_file_offset(offset), SEEK_SET) self.flash_offset = offset no = self.flash_tell() elif offset < 0: no = self.file_seek( self.flash_to_file_offset(self.flash_size - offset), SEEK_SET) elif whence == SEEK_CUR: if offset >= 0: # no = self.file_seek(self.flash_to_file_offset(offset), SEEK_CUR) self.flash_offset += offset no = self.flash_tell() elif offset < 0: no = self.file_seek( self.flash_to_file_offset(self.flash_offset - offset), SEEK_CUR) elif whence == SEEK_END: if offset == 0: no = self.file_seek(0, SEEK_END) elif offset < 0: no = self.file_seek( self.flash_to_file_offset(self.flash_size - offset), SEEK_END) # self.file_offset = no return self.file_to_flash_offset(no) def flash_read(self, num: int = 0) -> bytes: strt_page = self.flash_offset_to_page(self.flash_offset) strt_offs = self.flash_calc_page_offset(self.flash_offset) stop_page = self.flash_offset_to_page(self.flash_offset + num) stop_offs = self.flash_calc_page_offset(self.flash_offset + num) #print("\nFlash Read:") #print(f"\tFlash Size: 0x{self.flash_size:04X}") #print(f"\tStart Page: {strt_page}") #print(f"\tStart Offset: {strt_offs}") #print(f"\tStop Page: {stop_page}") #print(f"\tStop Offset: {stop_offs}") with BytesIO() as bio: if strt_page == stop_page: # only one page bio.write(self.get_page(strt_page)[strt_offs:stop_offs]) for page_num in range(strt_page, stop_page): tmp_page = self.get_page(page_num) if page_num == strt_page: # first page bio.write(tmp_page[strt_offs:]) elif page_num == stop_page: # last page bio.write(tmp_page[:stop_offs]) else: # between first and last bio.write(tmp_page) data = bio.getvalue() # self.flash_offset = self.flash_tell() return data def flash_write(self, data: (bytes, bytearray)) -> int: strt_page = self.flash_offset_to_page(self.flash_offset) strt_offs = self.flash_calc_page_offset(self.flash_offset) stop_page = self.flash_offset_to_page(self.flash_offset + len(data)) stop_offs = self.flash_calc_page_offset(self.flash_offset + len(data)) # print("\nFlash Write:") # print(f"\tFlash Size: 0x{self.flash_size:04X}") # print(f"\tStart Page: {strt_page}") # print(f"\tStart Offset: {strt_offs}") # print(f"\tStop Page: {stop_page}") # print(f"\tStop Offset: {stop_offs}") nw = 0 with BytesIO(data) as bio: if strt_page == stop_page: # only one page chunk_size = stop_offs - strt_offs tmp_page = bytearray(self.get_page(strt_page)) pack_into(f"{chunk_size}s", tmp_page, strt_offs, bio.read(chunk_size)) for page_num in range(strt_page, stop_page): tmp_page = bytearray(self.get_page(page_num)) if page_num == strt_page: # first page chunk_size = 512 - strt_offs pack_into(f"{chunk_size}s", tmp_page, strt_offs, bio.read(512 - strt_offs)) elif page_num == stop_page: # last page chunk_size = 512 - stop_offs pack_into(f"{chunk_size}s", tmp_page, 0, bio.read(chunk_size)) else: # between first and last pack_into(f"512s", tmp_page, 0, bio.read(512)) nw += self.set_page(page_num, tmp_page) # self.flash_offset = self.flash_tell() return nw # page I/O def get_page(self, num: int) -> bytes: """ Get a page by page number :param num: :return: """ self.file_seek(self.calc_page_offset(num)) return self.file_read(512) def get_spare(self, num: int) -> bytes: """ Get a spare by page number :param num: :return: """ self.file_seek(self.calc_spare_offset(num)) return self.file_read(16) def set_page(self, num: int, data: (bytes, bytearray)) -> int: """ Set a page by page number :param num: :param data: :return: """ assert 1 <= num <= self.num_pages, "Page number out of range" assert len(data) == 512, "Invalid page size" spare_data = self.get_spare(num) self.file_seek(self.calc_page_offset(num)) (page_data, spare_data) = fix_page_ecc(data, spare_data) nw = self.file_write(page_data) nw += self.file_write(spare_data) return nw def set_spare(self, num: int, data: (bytes, bytearray)) -> int: """ Set a spare by spare number :param num: :param data: :return: """ assert 1 <= num <= self.num_pages, "Page number out of range" assert len(data) == 16, "Invalid spare size" self.file_seek(self.calc_spare_offset(num)) return self.file_write(data)
class DataIO(object): """ This class simply wraps a binary file or a bytes string and implements both the file and bytes interface. It allows an input to be provided as files of bytes and manipulated indifferently as a file or a bytes object. """ def __init__(self, f): if isinstance(f, bytes): from io import BytesIO self.f = BytesIO(f) else: self.f = f self.view = dataView(dataio=self) def __getitem__(self, i): stay = self.f.tell() sta = i.start if sta is None: sta = stay self.f.seek(sta, 0) if i.stop is None: data = self.f.read() else: data = self.f.read(i.stop - sta) self.f.seek(stay, 0) return data def size(self): stay = self.f.tell() self.f.seek(0, 2) sz = self.f.tell() self.f.seek(stay, 0) return sz def read(self, size=-1): return self.f.read(size) def readline(self, size=-1): return self.f.readline(size) def readlines(self, size=-1): return self.f.readlines(size) def xreadlines(self, size=-1): return self.f.xreadlines(size) def write(self, s): return self.f.write(s) def writelines(self, l): return self.f.writelines(l) def seek(self, offset, whence=0): return self.f.seek(offset, whence) def tell(self): return self.f.tell() def flush(self): return self.f.flush() def fileno(self): return self.f.fileno() def isatty(self): return self.f.isatty() def next(self): return self.f.next() def truncate(self, size=0): return self.f.truncate(size) def close(self): return self.f.close() @property def closed(self): return self.f.closed @property def encoding(self): return self.f.encoding @property def errors(self): return self.f.errors @property def mode(self): return self.f.mode @property def name(self): try: return self.f.name except AttributeError: s = bytes(self.f.getvalue()) return "(sc-%s...)" % ("".join(["%02x" % x for x in s])[:8]) filename = name @property def newlines(self): return self.f.newlines @property def softspace(self): return self.f.softspace
def encrypt_aes(file_or_data, password=None, outfile=None, iv=None, salt=None, mode=None, base64encode=False, chunksize=512 * 1024): r""" Flexible implementaiton of AES encryption Parameters ---------- file_or_data : {BufferObject, string, bytes} input data will be converted to bytes sequence for encryption password : {str, None} if None, a prompt will ask for inputing password outfile : {None, path, file} if None, return raw encrypted data iv : initial vector 16 bytes salt : {None, string, bytes} salt for password Hashing mode : Cipher.AES.MODE_* default `None` is converted to `Crypto.Cipher.AES.MODE_CBC` chunksize : int encryption chunk, multiple of 16. """ try: from Crypto.Cipher import AES except ImportError as e: raise ImportError("Require 'pycrypto' to run this function") if mode is None: mode = AES.MODE_CBC if password is None: password = input("Your password: "******"Password length must be greater than 0" password = to_password(password, salt=salt) # Initialization vector if iv is None: iv = os.urandom(16) encryptor = AES.new(password, mode, IV=iv) # ====== check read stream ====== # infile, filesize, own_file = _data_to_iobuffer(file_or_data) # ====== check out stream ====== # close_file = False if isinstance(outfile, string_types) and \ os.path.exists(os.path.dirname(outfile)): outfile = open(str(outfile), 'wb') close_file = True elif hasattr(outfile, 'write') and hasattr(outfile, 'flush'): close_file = True else: outfile = BytesIO() # ====== some header information ====== # outfile.write(struct.pack('<Q', filesize)) outfile.write(iv) while True: chunk = infile.read(chunksize) if bool(base64encode): chunk = base64.encodebytes(chunk) # EOF if len(chunk) == 0: break # doing padding to match chunk size elif len(chunk) % 16 != 0: chunk += b' ' * (16 - len(chunk) % 16) outfile.write(encryptor.encrypt(chunk)) # ====== clean and return ====== # if own_file: infile.close() outfile.flush() if close_file: outfile.close() else: outfile.seek(0) data = outfile.read() outfile.close() return data
def _run_step(self, step_num, step_type, input_path, output_path, working_dir, env, child_stdin=None): step = self._get_step(step_num) # if no mapper, just pass the data through (see #1141) if step_type == 'mapper' and not step.get('mapper'): copyfile(input_path, output_path) return # Passing local=False ensures the job uses proper names for file # options (see issue #851 on github) common_args = (['--step-num=%d' % step_num] + self._mr_job_extra_args(local=False)) if step_type == 'mapper': child_args = (['--mapper'] + [input_path] + common_args) elif step_type == 'reducer': child_args = (['--reducer'] + [input_path] + common_args) elif step_type == 'combiner': child_args = ['--combiner'] + common_args + ['-'] has_combiner = (step_type == 'mapper' and 'combiner' in step) try: # Use custom stdout if has_combiner: child_stdout = BytesIO() else: child_stdout = open(output_path, 'wb') with save_current_environment(): with save_cwd(): os.environ.update(env) os.chdir(working_dir) child_instance = self._mrjob_cls(args=child_args) child_instance.sandbox(stdin=child_stdin, stdout=child_stdout) child_instance.execute() if has_combiner: sorted_lines = sorted(child_stdout.getvalue().splitlines()) combiner_stdin = BytesIO(b'\n'.join(sorted_lines)) else: child_stdout.flush() finally: child_stdout.close() while len(self._counters) <= step_num: self._counters.append({}) parse_mr_job_stderr(child_instance.stderr.getvalue(), counters=self._counters[step_num]) if has_combiner: self._run_step(step_num, 'combiner', None, output_path, working_dir, env, child_stdin=combiner_stdin) combiner_stdin.close()
def download_cv(self, model, id, filename=None, **kwargs): _logger.info("DOWNLOAD CV") invoice = request.env[model].search([('id', '=', id)]) document = request.env['intern.document'].search([('name', '=', 'CV')], limit=1) finalDoc = invoice.createHeaderDoc() # byteIo = BytesIO() # finalDoc.save(byteIo) # byteIo.seek(0) reponds = BytesIO() archive = zipfile.ZipFile(reponds, 'w', zipfile.ZIP_DEFLATED) # merge_doc = None if finalDoc is not None: archive.write(finalDoc.name,u"名簿リスト.docx") # merge_doc = Document(finalDoc.name) os.unlink(finalDoc.name) else: return if invoice.order: ids = [] for intern in invoice.interns: ids.append(intern.id) list = None try: list = request.env['intern.intern'].search([('id', 'in', ids)], order="%s" % (invoice.order)) except: list = invoice.interns for i, intern in enumerate(list): childDoc = invoice.createCVDoc(document[0], intern, i) archive.write(childDoc.name,'cv_%d_%s.docx'%((i+1),intern_utils.name_with_underscore(intern.name))) # tmpDoc = Document(childDoc.name) # for element in tmpDoc.element.body: # merge_doc.element.body.append(element) os.unlink(childDoc.name) else: for i, intern in enumerate(invoice.interns): childDoc = invoice.createCVDoc(document[0], intern, i) archive.write(childDoc.name,'cv_%d_%s.docx'%((i+1),intern_utils.name_with_underscore(intern.name))) # tmpDoc = Document(childDoc.name) # for element in tmpDoc.element.body: # merge_doc.element.body.append(element) os.unlink(childDoc.name) # tempFile = NamedTemporaryFile(delete=False) # merge_doc.save(tempFile.name) # archive.write(tempFile.name,"full.docx") # os.unlink(tempFile.name) archive.close() reponds.flush() ret_zip = reponds.getvalue() reponds.close() return request.make_response(ret_zip, [('Content-Type', 'application/zip'), ('Content-Disposition', content_disposition(filename))])
def get_img_bytes(img): tmp = BytesIO() img.save(tmp, format="png") tmp.flush() return tmp.getbuffer()
def _mp_worker(url, table=None, proxies=None): """Code to download the urls and blow away the buffer to keep memory usage down""" warnings.filterwarnings("ignore", '.*have mixed types. Specify dtype.*') # ignore # pandas warning for GDELT 1.0 dtype # start = datetime.datetime.now() # proc_name = current_process().name # # proc = os.getpid() ################################# # DEBUG Prints ################################# # print(url) # print(multiprocessing.current_process().name) # print('Starting {0}-{1}'.format(proc_name, proc)) time.sleep(0.001) # print("Getting to request process finished in {}".\ # format(datetime.datetime.now() - start)) # start = datetime.datetime.now() ################################# r = requests.get(url, proxies=proxies, timeout=5) # print("Request finished in {}".format(datetime.datetime.now() - start)) if r.status_code == 404: message = "GDELT does not have a url for date time " \ "{0}".format(re.search('[0-9]{4,18}', url).group()) warnings.warn(message) # print (multiprocessing.Process(name=multiprocessing.current_process().\ # name).is_alive()) start = datetime.datetime.now() try: buffer = BytesIO(r.content) if table == 'events': frame = pd.read_csv(buffer, compression='zip', sep='\t', header=None, warn_bad_lines=False, dtype={ 26: 'str', 27: 'str', 28: 'str' }) # , # parse_dates=[1, 2]) elif table == 'gkg': frame = pd.read_csv(buffer, compression='zip', sep='\t', header=None, warn_bad_lines=False) # parse_dates=['DATE'], warn_bad_lines=False) else: # pragma: no cover frame = pd.read_csv(buffer, compression='zip', sep='\t', header=None, warn_bad_lines=False) # print("Pandas load finished in {}".\ # format(datetime.datetime.now() - start)) # print ("{0} with id {1} finished processing in {2}".\ # format(proc_name,proc,end)) buffer.flush() buffer.close() return frame except: try: message = "GDELT did not return data for date time " \ "{0}".format(re.search('[0-9]{4,18}', url).group()) warnings.warn(message) except: # pragma: no cover message = "No data returned for {0}".format(r.url) warnings.warn(message)
def check_simple_write_read(records, indent=" "): # print(indent+"Checking we can write and then read back these records") for format in test_write_read_alignment_formats: if format not in possible_unknown_seq_formats \ and isinstance(records[0].seq, UnknownSeq) \ and len(records[0].seq) > 100: # Skipping for speed. Some of the unknown sequences are # rather long, and it seems a bit pointless to record them. continue print(indent + "Checking can write/read as '%s' format" % format) # Going to write to a handle... if format in SeqIO._BinaryFormats: handle = BytesIO() else: handle = StringIO() try: with warnings.catch_warnings(): # e.g. data loss warnings.simplefilter("ignore", BiopythonWarning) c = SeqIO.write( sequences=records, handle=handle, format=format) assert c == len(records) except (TypeError, ValueError) as e: # This is often expected to happen, for example when we try and # write sequences of different lengths to an alignment file. if "len()" in str(e): # Python 2.4.3, # >>> len(None) # ... # TypeError: len() of unsized object # # Python 2.5.2, # >>> len(None) # ... # TypeError: object of type 'NoneType' has no len() print("Failed: Probably len() of None") else: print(indent + "Failed: %s" % str(e)) if records[0].seq.alphabet.letters is not None: assert format != t_format, \ "Should be able to re-write in the original format!" # Carry on to the next format: continue handle.flush() handle.seek(0) # Now ready to read back from the handle... try: records2 = list(SeqIO.parse(handle=handle, format=format)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(records))) assert len(records2) == t_count for r1, r2 in zip(records, records2): # Check the bare minimum (ID and sequence) as # many formats can't store more than that. assert len(r1) == len(r2) # Check the sequence if format in ["gb", "genbank", "embl", "imgt"]: # The GenBank/EMBL parsers will convert to upper case. if isinstance(r1.seq, UnknownSeq) \ and isinstance(r2.seq, UnknownSeq): # Jython didn't like us comparing the string of very long # UnknownSeq object (out of heap memory error) assert r1.seq._character.upper() == r2.seq._character else: assert str(r1.seq).upper() == str(r2.seq) elif format == "qual": assert isinstance(r2.seq, UnknownSeq) assert len(r2) == len(r1) else: assert str(r1.seq) == str(r2.seq) # Beware of different quirks and limitations in the # valid character sets and the identifier lengths! if format in ["phylip", "phylip-sequential"]: assert r1.id.replace("[", "").replace("]", "")[:10] == r2.id, \ "'%s' vs '%s'" % (r1.id, r2.id) elif format == "phylip-relaxed": assert r1.id.replace(" ", "").replace(':', '|') == r2.id, \ "'%s' vs '%s'" % (r1.id, r2.id) elif format == "clustal": assert r1.id.replace(" ", "_")[:30] == r2.id, \ "'%s' vs '%s'" % (r1.id, r2.id) elif format == "stockholm": assert r1.id.replace(" ", "_") == r2.id, \ "'%s' vs '%s'" % (r1.id, r2.id) elif format == "fasta": assert r1.id.split()[0] == r2.id else: assert r1.id == r2.id, \ "'%s' vs '%s'" % (r1.id, r2.id) if len(records) > 1: # Try writing just one record (passing a SeqRecord, not a list) if format in SeqIO._BinaryFormats: handle = BytesIO() else: handle = StringIO() SeqIO.write(records[0], handle, format) assert handle.getvalue() == records[0].format(format)
def from_directories(cls, directories, pattern=None, ignore=(), write=None, relative_to=None): """ convert directories to a simple manifest; returns ManifestParser instance pattern -- shell pattern (glob) or patterns of filenames to match ignore -- directory names to ignore write -- filename or file-like object of manifests to write; if `None` then a BytesIO instance will be created relative_to -- write paths relative to this path; if false then the paths are absolute """ # determine output opened_manifest_file = None # name of opened manifest file absolute = not relative_to # whether to output absolute path names as names if isinstance(write, string_types): opened_manifest_file = write write = open(write, 'w') if write is None: write = BytesIO() # walk the directories, generating manifests def callback(directory, dirpath, dirnames, filenames): # absolute paths filenames = [ os.path.join(dirpath, filename) for filename in filenames ] # ensure new manifest isn't added filenames = [ filename for filename in filenames if filename != opened_manifest_file ] # normalize paths if not absolute and relative_to: filenames = [ relpath(filename, relative_to) for filename in filenames ] # write to manifest print('\n'.join([ '[%s]' % denormalize_path(filename) for filename in filenames ]), file=write) cls._walk_directories(directories, callback, pattern=pattern, ignore=ignore) if opened_manifest_file: # close file write.close() manifests = [opened_manifest_file] else: # manifests/write is a file-like object; # rewind buffer write.flush() write.seek(0) manifests = [write] # make a ManifestParser instance return cls(manifests=manifests)
class BaseHeaders: header_size: int chunk_size: int max_target: int genesis_hash: Optional[bytes] target_timespan: int validate_difficulty: bool = True def __init__(self, path) -> None: if path == ':memory:': self.io = BytesIO() self.path = path self._size: Optional[int] = None async def open(self): if self.path != ':memory:': if not os.path.exists(self.path): self.io = open(self.path, 'w+b') else: self.io = open(self.path, 'r+b') async def close(self): self.io.close() @staticmethod def serialize(header: dict) -> bytes: raise NotImplementedError @staticmethod def deserialize(height, header): raise NotImplementedError def get_next_chunk_target(self, chunk: int) -> ArithUint256: return ArithUint256(self.max_target) @staticmethod def get_next_block_target(chunk_target: ArithUint256, previous: Optional[dict], current: Optional[dict]) -> ArithUint256: return chunk_target def __len__(self) -> int: if self._size is None: self._size = self.io.seek(0, os.SEEK_END) // self.header_size return self._size def __bool__(self): return True def __getitem__(self, height) -> dict: if isinstance(height, slice): raise NotImplementedError( "Slicing of header chain has not been implemented yet.") if not 0 <= height <= self.height: raise IndexError( f"{height} is out of bounds, current height: {self.height}") return self.deserialize(height, self.get_raw_header(height)) def get_raw_header(self, height) -> bytes: self.io.seek(height * self.header_size, os.SEEK_SET) return self.io.read(self.header_size) @property def height(self) -> int: return len(self) - 1 @property def bytes_size(self): return len(self) * self.header_size def hash(self, height=None) -> bytes: return self.hash_header( self.get_raw_header(height if height is not None else self.height)) @staticmethod def hash_header(header: bytes) -> bytes: if header is None: return b'0' * 64 return hexlify(double_sha256(header)[::-1]) async def connect(self, start: int, headers: bytes) -> int: added = 0 bail = False for height, chunk in self._iterate_chunks(start, headers): try: # validate_chunk() is CPU bound and reads previous chunks from file system self.validate_chunk(height, chunk) except InvalidHeader as e: bail = True chunk = chunk[:(height - e.height) * self.header_size] written = 0 if chunk: self.io.seek(height * self.header_size, os.SEEK_SET) written = self.io.write(chunk) // self.header_size self.io.truncate() # .seek()/.write()/.truncate() might also .flush() when needed # the goal here is mainly to ensure we're definitely flush()'ing self.io.flush() self._size = self.io.tell() // self.header_size added += written if bail: break return added def validate_chunk(self, height, chunk): previous_hash, previous_header, previous_previous_header = None, None, None if height > 0: previous_header = self[height - 1] previous_hash = self.hash(height - 1) if height > 1: previous_previous_header = self[height - 2] chunk_target = self.get_next_chunk_target(height // 2016 - 1) for current_hash, current_header in self._iterate_headers( height, chunk): block_target = self.get_next_block_target( chunk_target, previous_previous_header, previous_header) self.validate_header(height, current_hash, current_header, previous_hash, block_target) previous_previous_header = previous_header previous_header = current_header previous_hash = current_hash def validate_header(self, height: int, current_hash: bytes, header: dict, previous_hash: bytes, target: ArithUint256): if previous_hash is None: if self.genesis_hash is not None and self.genesis_hash != current_hash: raise InvalidHeader( height, f"genesis header doesn't match: {current_hash.decode()} " f"vs expected {self.genesis_hash.decode()}") return if header['prev_block_hash'] != previous_hash: raise InvalidHeader( height, "previous hash mismatch: {} vs expected {}".format( header['prev_block_hash'].decode(), previous_hash.decode())) if self.validate_difficulty: if header['bits'] != target.compact: raise InvalidHeader( height, "bits mismatch: {} vs expected {}".format( header['bits'], target.compact)) proof_of_work = self.get_proof_of_work(current_hash) if proof_of_work > target: raise InvalidHeader( height, f"insufficient proof of work: {proof_of_work.value} vs target {target.value}" ) async def repair(self): previous_header_hash = fail = None batch_size = 36 for start_height in range(0, self.height, batch_size): self.io.seek(self.header_size * start_height) headers = self.io.read(self.header_size * batch_size) if len(headers) % self.header_size != 0: headers = headers[:(len(headers) // self.header_size) * self.header_size] for header_hash, header in self._iterate_headers( start_height, headers): height = header['block_height'] if height: if header['prev_block_hash'] != previous_header_hash: fail = True else: if header_hash != self.genesis_hash: fail = True if fail: log.warning( "Header file corrupted at height %s, truncating it.", height - 1) self.io.seek( max(0, (height - 1)) * self.header_size, os.SEEK_SET) self.io.truncate() self.io.flush() self._size = None return previous_header_hash = header_hash @staticmethod def get_proof_of_work(header_hash: bytes) -> ArithUint256: return ArithUint256(int(b'0x' + header_hash, 16)) def _iterate_chunks(self, height: int, headers: bytes) -> Iterator[Tuple[int, bytes]]: assert len( headers ) % self.header_size == 0, f"{len(headers)} {len(headers)%self.header_size}" start = 0 end = (self.chunk_size - height % self.chunk_size) * self.header_size while start < end: yield height + (start // self.header_size), headers[start:end] start = end end = min(len(headers), end + self.chunk_size * self.header_size) def _iterate_headers(self, height: int, headers: bytes) -> Iterator[Tuple[bytes, dict]]: assert len(headers) % self.header_size == 0, len(headers) for idx in range(len(headers) // self.header_size): start, end = idx * self.header_size, (idx + 1) * self.header_size header = headers[start:end] yield self.hash_header(header), self.deserialize( height + idx, header)
class _BaseBinaryWrapper: def __init__(self, stream: Union[typing.BinaryIO, bytes] = b""): if isinstance(stream, bytes) or isinstance(stream, bytearray): self.stream = BytesIO(stream) else: self.stream = stream # Wrappings: def close(self) -> None: return self.stream.close() def flush(self) -> None: return self.stream.flush() def read(self, n: int = -1) -> AnyStr: return self.stream.read(n) def readable(self) -> bool: return self.stream.readable() def readline(self, limit: int = -1) -> AnyStr: return self.stream.readline(limit) def readlines(self, hint: int = -1) -> List[AnyStr]: return self.stream.readlines(hint) def write(self, s: Union[bytes, bytearray]) -> int: return self.stream.write(s) def writable(self) -> bool: return self.stream.writable() def writelines(self, lines: Iterable[AnyStr]) -> None: self.stream.writelines(lines) def seek(self, offset: int, whence: int = 0) -> int: return self.stream.seek(offset, whence) def seekable(self) -> bool: return self.stream.seekable() def tell(self) -> int: return self.stream.tell() def fileno(self) -> int: return self.stream.fileno() def __enter__(self): self.stream.__enter__() return self def __exit__(self, exc_type, exc_val, exc_tb): self.stream.__exit__(exc_type, exc_val, exc_tb) # helper functions def readall(self): self.stream.seek(0) return self.stream.read() def getvalue(self): if isinstance(self.stream, BytesIO): return self.stream.getvalue() pos = self.stream.tell() ret = self.readall() self.stream.seek(pos) return ret def align(self, alignment=4): if offset := (self.tell() % alignment): self.seek(self.tell() + alignment - offset)
curr_hiscore_in_ram_bytesio.seek(0) # rewind curr_hiscore_in_ram_bytesio.write(buf) if row == hiscore_rows_to_process[-1]: # TODO: check if all the rows were written hiscore_inited_in_ram = True retroarch.show_msg("Hiscore loaded") elif response_bytes: # not the first loop # append read bytes to curr_hiscore_in_ram_bytesio for b in response_bytes: curr_hiscore_in_ram_bytesio.write(bytes([int(b, base=16)])) # end for rows # check if hiscore data is changed curr_hiscore_in_ram_bytesio.flush() curr_hiscore_in_ram_bytesio.seek(0) # rewind #print(curr_hiscore_in_ram_bytesio.getvalue()) #print(hiscore_file_bytesio.getvalue()) curr_hiscore_in_ram_bytesio_value = curr_hiscore_in_ram_bytesio.getvalue() if len(curr_hiscore_in_ram_bytesio_value) > 0 and bool( any(c != 0 for c in curr_hiscore_in_ram_bytesio_value) ) and curr_hiscore_in_ram_bytesio_value != hiscore_file_bytesio.getvalue(): # (over-)write to the hiscore file #if HISCORE_PATH_USE_SUBDIRS and not os.path.exists(HISCORE_PATH + "/" + system): # os.mkdir(HISCORE_PATH + "/" + system) if not os.path.isfile(hiscore_file_path): # show msg only at the 1st save retroarch.show_msg("Hiscore file created") hiscore_file = open(hiscore_file_path, 'wb') # write+binary mode hiscore_file.write(curr_hiscore_in_ram_bytesio_value)
def get(self, request, graphid, nodeid=None): if self.action == "export_graph": graph = get_graphs_for_export([graphid]) graph["metadata"] = system_metadata() f = JSONSerializer().serialize(graph, indent=4) graph_name = JSONDeserializer().deserialize(f)["graph"][0]["name"] response = HttpResponse(f, content_type="json/plain") response[ "Content-Disposition"] = 'attachment; filename="%s.json"' % ( graph_name) return response elif self.action == "export_mapping_file": files_for_export = create_mapping_configuration_file(graphid, True) file_name = Graph.objects.get(graphid=graphid).name buffer = BytesIO() with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zip: for f in files_for_export: f["outputfile"].seek(0) zip.writestr(f["name"], f["outputfile"].read()) zip.close() buffer.flush() zip_stream = buffer.getvalue() buffer.close() response = HttpResponse() response[ "Content-Disposition"] = "attachment; filename=" + file_name + ".zip" response["Content-length"] = str(len(zip_stream)) response["Content-Type"] = "application/zip" response.write(zip_stream) return response elif self.action == "get_domain_connections": res = [] graph = Graph.objects.get(graphid=graphid) ret = graph.get_valid_domain_ontology_classes() for r in ret: res.append({ "ontology_property": r["ontology_property"], "ontology_classes": [c for c in r["ontology_classes"]] }) return JSONResponse(res) elif self.action == "get_nodes": graph = Graph.objects.get(graphid=graphid) return JSONResponse(graph.nodes) elif self.action == "get_related_nodes": parent_nodeid = request.GET.get("parent_nodeid", None) graph = Graph.objects.get(graphid=graphid) ret = graph.get_valid_ontology_classes(nodeid=nodeid, parent_nodeid=parent_nodeid) return JSONResponse(ret) elif self.action == "get_valid_domain_nodes": graph = Graph.objects.get(graphid=graphid) if nodeid == "": nodeid = None ret = graph.get_valid_domain_ontology_classes(nodeid=nodeid) return JSONResponse(ret) return HttpResponseNotFound()
async def encode( self, data: Mapping[str, Any], *, schema: str, message_type: Optional[Callable] = None, **kwargs: Any, ) -> Union[MessageType, MessageMetadataDict]: """ Encode data with the given schema. Create content type value, which consists of the Avro Mime Type string and the schema ID corresponding to given schema. If provided with a message constructor callback, pass encoded data and content type to create message object. If not provided, return the following dict: {"data": Avro encoded value, "content_type": Avro mime type string + schema ID}. If `message_type` is set, then additional keyword arguments will be passed to the message callback function provided. Schema must be an Avro RecordSchema: https://avro.apache.org/docs/1.10.0/gettingstartedpython.html#Defining+a+schema :param data: The data to be encoded. :type data: Mapping[str, Any] :keyword schema: Required. The schema used to encode the data. :paramtype schema: str :keyword message_type: The callback function or message class to construct the message. If message class, it must be a subtype of the azure.schemaregistry.encoder.avroencoder.MessageType protocol. If callback function, it must have the following method signature: `(data: bytes, content_type: str, **kwargs) -> MessageType`, where `data` and `content_type` are positional parameters. :paramtype message_type: Callable or None :rtype: MessageType or MessageMetadataDict :raises ~azure.schemaregistry.encoder.avroencoder.exceptions.SchemaParseError: Indicates an issue with parsing schema. :raises ~azure.schemaregistry.encoder.avroencoder.exceptions.SchemaEncodeError: Indicates an issue with encoding data for provided schema. """ raw_input_schema = schema try: schema_fullname = self._avro_encoder.get_schema_fullname( raw_input_schema) except Exception as e: # pylint:disable=broad-except SchemaParseError(f"Cannot parse schema: {raw_input_schema}", error=e).raise_with_traceback() schema_id = await self._get_schema_id(schema_fullname, raw_input_schema) content_type = f"{AVRO_MIME_TYPE}+{schema_id}" try: data_bytes = self._avro_encoder.encode(data, raw_input_schema) except Exception as e: # pylint:disable=broad-except SchemaEncodeError( "Cannot encode value '{}' for schema: {}".format( data, raw_input_schema), error=e, ).raise_with_traceback() stream = BytesIO() stream.write(data_bytes) stream.flush() payload = stream.getvalue() stream.close() if message_type: try: return message_type.from_message_data(payload, content_type, **kwargs) except AttributeError: try: return message_type(payload, content_type, **kwargs) except TypeError as e: SchemaEncodeError( f"""The data model {str(message_type)} is not a Callable that takes `data` and `content_type` or a subtype of the MessageType protocol. If using an Azure SDK model class, please check the README.md for the full list of supported Azure SDK models and their corresponding versions.""" ).raise_with_traceback() return {"data": payload, "content_type": content_type}