def _internal_scan_directory(self, dir_name): if not os.path.isdir(dir_name): sys.stderr.write(dir_name + " is not a directory.\n") return try: dir_list = os.listdir(dir_name) except OSError as e: sys.stderr.write(str(e) + "\n") return for filename in dir_list: src_file = os.path.join(dir_name, filename) if os.path.isdir(src_file): self._internal_scan_directory(src_file) continue if not os.path.isfile(src_file) or not self.is_valid_file_type( src_file): self._nonpic_size += utils.get_file_size(src_file) self._nonpiccount += 1 continue self._total_pics_size += utils.get_file_size(src_file) self._total_pics_count += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument("-f", default="images/createrLake.tiff") # image file parser.add_argument("-q", default=90, type=int) # image quality parser.add_argument("-p", default=100, type=int) # Percent of Downsampling parser.add_argument("--plot", default=0, type=bool) # test suites args = parser.parse_args() fname = args.f quality = args.q percent = args.p image = misc.imread(fname) data = JPEG_compression(image,quality,percent) utils.save_to_gzip(data,fname) print("Original File Size = {0} B".format(utils.get_file_size(fname))) print("New File Size = {0} B".format(utils.get_file_size(fname + ".gz"))) #Load File data2 = utils.gzip_to_data(fname + ".gz") im2 = JPEG_decompression(data2) print("PSNR = {0}".format(utils.psnr(image,im2))) if(args.plot): plt.figure() plt.subplot(1,2,1) plt.imshow(image) plt.subplot(1,2,2) plt.imshow(im2) plt.show()
def run(self): """The actual thread code""" self._running = True self._started = True self._log.debug('Starting %s ' % self.name) while True: # Assume USB flash drive is powered up and mounted. # If necessary, create the USB flash destination dir. dest_dir = self._get_dest_dir(self._dest_file_name) utils.make_dirs(dest_dir, self._log) comp_ratio = 1.0 if self._compress: compressed_path = self._src_path + '.gz' if not self._compress_file(self._src_path, compressed_path): self._log.error('Error: file compression failed on %s' % \ self._src_path) utils.delete_file(self._src_path, self._log) utils.delete_file(compressed_path, self._log) return False orig_bytes = float(utils.get_file_size(self._src_path)) comp_bytes = float(utils.get_file_size(compressed_path)) comp_ratio = orig_bytes/comp_bytes from_path = compressed_path to_path = ''.join((dest_dir, '/', self._dest_file_name, '.gz')) else: from_path = self._src_path to_path = ''.join((dest_dir, '/', self._dest_file_name)) self._log.info('Storing %s' % to_path) self._log.info(' Compression ratio was %.2f to 1' % comp_ratio) try: utils.copy_file(from_path, to_path, self._log) except Exception: self._log.info('Error: write to %s failed' % to_path) # Delete the temp files try: utils.delete_file(self._src_path, self._log) except Exception: self._log.info('Error: could not delete %s' % self._src_path) if self._compress: try: utils.delete_file(compressed_path, self._log) except Exception: self._log.error('Could not delete %s' % compressed_path) break self._running = False if self._exit_callback: self._exit_callback(self) self._log.debug('Exiting %s ' % self.name)
def search(fname,percent): lim = 8500 size = float('inf') image = misc.imread(fname) ps = range(percent,1,-1) print(ps) qs = range(99,1,-1) p_f = 0 q_f = 0 for p in ps: for q in qs: data = JPEG_compression(image,q,p) utils.save_to_gzip(data,fname) size = utils.get_file_size(fname + ".gz") print(size) if(size < lim): q_f = q break print(p) if(size < lim): p_f = p break print(q_f,p_f)
def get_objects_on_disk(self): """ get_objects_on_disk(self) Walk though local storage and build one giant dictionary of objects on disk """ objects_on_disk = {} download_path = self.options['dest_sync'] if 'shelf' in self.options: download_path = os.path.join(download_path, self.options['shelf']) for (root, dirs, files) in os.walk(download_path): for f in files: obj = os.path.join(root, f) object_name = utils.unix_path( os.path.relpath(obj, self.options['dest_sync'])) # Return sha1 hash if checksum is enabled if self.options['checksum']: objects_on_disk.update( {object_name: { 'sha1_hash': utils.get_sha1(obj), }}) else: objects_on_disk.update({ object_name: { 'modified_time': utils.get_modified_time(obj), 'file_size': utils.get_file_size(obj) } }) return objects_on_disk
def binary_random_chunk_generator(seed, paths, chunk_length, disable_randomization=False): rng = np.random.default_rng(seed) # The path, current offset, total size of file containers = [[path, 0, utils.get_file_size(path)] for path in paths] for c in containers: if c[2] % chunk_length != 0: raise Exception("Path {} is not evenly divisible by {}".format( (c[0], c[2]))) while len(containers) > 0: if disable_randomization: random_index = 0 # Will always read from the first path until it is exhausted, then move to next else: random_index = rng.integers(0, len(containers)) #randint is exclusive c = containers[random_index] with open(c[0], "rb") as f: f.seek(c[1]) buf = f.read(chunk_length) c[1] += chunk_length if c[1] == c[2]: containers.remove(c) yield buf
def get_objects_on_disk(self): """ get_objects_on_disk(self) Walk though local storage and build one giant dictionary of objects on disk """ objects_on_disk = {} download_path = self.options['dest_sync'] if 'shelf' in self.options: download_path = os.path.join(download_path, self.options['shelf']) for (root, dirs, files) in os.walk(download_path): for f in files: obj = os.path.join(root, f) object_name = utils.unix_path( os.path.relpath(obj, self.options['dest_sync']) ) # Return sha1 hash if checksum is enabled if self.options['checksum']: objects_on_disk.update({object_name: { 'sha1_hash': utils.get_sha1(obj), }}) else: objects_on_disk.update({object_name: { 'modified_time': utils.get_modified_time(obj), 'file_size': utils.get_file_size(obj) }}) return objects_on_disk
def main(args): zipfilepath = args.zip if zipfilepath is None: print "pass arguements correctly!" exit(-1) xmlfilepath = args.xmlfile zip_path = zipfilepath if utils.valid_file(zip_path) is not True: print "bad zip" exit(-1) data_for_all_files = [] path_to_extract = utils.random_temp_path(TEMP_DIR) utils.extractor(zip_path, path_to_extract) list_of_all_files = utils.getListOfFiles(path_to_extract) for path_to_file in list_of_all_files: uid = utils.get_uuid() filename = utils.stripfilepath(path_to_file) rel_path = utils.get_relative_path(path_to_file, path_to_extract) md5hash = utils.md5sum(path_to_file) filesize = utils.get_file_size(filepath=path_to_file) data = FileDetails(file_uuid=uid, file_name=filename, file_full_path=path_to_file, relative_path=rel_path, file_md5hash=md5hash, file_size=filesize) data_for_all_files.append(data) XS.XMLSerialize(data_for_all_files, xmlfilepath) utils.cleanup(path_to_extract) exit(0)
def createFile(request): if request.method == 'POST': file_name=request.POST.get("filename","test") #save file to localhost repo repo_path=utils.get_repo_path(file_name) rev_control=utils.MercurialRevisionControl() rev_control.create_repo(repo_path) auth_user=request.session['auth_username'] rev_control.hg_rc(repo_path,'ui','username',auth_user) file_content=request.POST.get("content","") utils.create_file(repo_path,file_content) rev_control.add(repo_path) rev_control.commit(repo_path) #utils.write_file(dockerfile,request.POST.get("content","")) #revision_control=utils.MercurialRevisionControl() #revision_control.create_repo( #save file to db file_path=utils.get_file_path(file_name) file_size=utils.get_file_size(file_path) created=utils.get_current_datatime() created_by=request.session.get("auth_username") modified=created modified_by=created_by path=file_path data=DockerFiles(Name=file_name,Size=file_size,Created=created,CreatedBy=created_by,Modified=modified,ModifiedBy=modified_by,Path=path) data.save() return HttpResponseRedirect('/admin/files')
def onDocumentOpened(self): file_size = get_file_size(self._active_doc.path) file_name = self._active_doc.name self.l.info('Opened "{}".'.format(file_name)) self.tempStatusMessage('Opened "{}" ({})'.format(file_name, format_file_size(file_size))) self.updateDirectoryActions() self.updateFileActions() self.updateEditActions() self.loadPage(0)
def _internal_upload_directory(self, dir_name): if not os.path.isdir(dir_name): sys.stderr.write(dir_name + " is not a directory.\n") return try: dir_list = os.listdir(dir_name) except OSError as e: sys.stderr.write(str(e) + "\n") return for filename in dir_list: src_file = os.path.join(dir_name, filename) if os.path.isdir(src_file): self._internal_upload_directory(src_file) continue if not os.path.isfile(src_file): continue # if file is not jpg then continue if not self.is_valid_file_type(src_file): print("File " + filename + " is not an allowed file type.\n") continue self._count += 1 file_size = utils.get_file_size(src_file) self._sizecount += file_size md5sum = utils.get_md5sum_from_file(src_file) uploaded = self._dataHelper.file_already_uploaded( self._cloud_service_name, md5sum) if uploaded: print "File", src_file, "already uploaded. 1" continue stt = time.time() photo_id = self.upload_file(src_file, md5sum) secondstoupload = time.time() - stt bits_per_second = file_size / secondstoupload if photo_id != 0: self._dataHelper.set_file_uploaded(src_file, self._cloud_service_name, photo_id, md5sum) else: self._failcount += 1 if self._total_pics_count > 0: p = float(self._count) / float(self._total_pics_count) * 100.0 print str(int(p)) + "% done. (" + str(self._count), "of", self._total_pics_count, \ "pictures,", self._failcount, "fails - " + utils.sizeof_fmt(self._sizecount) + \ " of " + utils.sizeof_fmt(self._total_pics_size) + ") ETA: " + \ utils.format_eta(bits_per_second, self._sizecount, self._total_pics_size)
def copy_file_to_string(file_path, log): """Return a string containing the contents of a file Return None if file does not exist """ log.debug('trying to open %s' % file_path) try: f = open(file_path, 'rb') except Exception: log.error('Could not open %s' % file_path) return None file_size = utils.get_file_size(file_path) file_str = f.read(file_size) f.close() return file_str
def file_list(): files = os.listdir('./upload/') # 获取文件目录 files_and_size = {} for file in files: file_abs = './upload/' + file file_and_size = [] if not isinstance(file_abs, str): file_abs.decode('utf-8') if not file == "None": file_and_size.append(file_abs) file_and_size.append(utils.get_file_size(file_abs)) file_and_size.append(utils.get_file_create_time(file_abs)) files_and_size[file] = file_and_size return rt('./list.html', files_and_size=files_and_size)
def saveCopy(self): file_name = self._active_doc.getSaveFilename() try: self._active_doc.saveDrawingPDF() file_size = get_file_size(self._active_doc.getSavePath()) self.l.info('Saved copy "{}" from "{}".'.format(file_name, self._active_doc.path)) self.tempStatusMessage('Saved copy as "{}" ({})'.format(file_name, format_file_size(file_size))) except RuntimeError as e: msg = str(e).capitalize() self.l.error(msg) show_error_dialog(self, msg) if self._active_dir is not None: self._active_dir.refresh()
def _process_rx_pkt(self, rx_pkt, pkt_time_stamp): """Process a pkt from the instrument""" if self._data_file_state == 1: # Open new data file and write a pkt to it self._data_file_path = ''.join( (cases_mgr_config.temp_dir, cases_mgr_config.proc_mnemonic, '_', utils.time_stamp_str(pkt_time_stamp), '.dat')) if not self._open_data_file(): return self._file_time_stamp = pkt_time_stamp self._write_to_data_file(rx_pkt) self._data_file_state = 2 elif self._data_file_state == 2: self._write_to_data_file(rx_pkt) # If tmp file is max size or too old # save it in USB flash file_age = pkt_time_stamp - self._file_time_stamp #self._log.debug('file_age is %s' % str(file_age.seconds)) file_size = utils.get_file_size(self._data_file_path) save_due_to_size = file_size >= cases_mgr_config.data_file_max_size save_due_to_age = file_age.seconds >= cases_mgr_config.data_file_storage_period # Compress low rate data only. Low rate data is saved due to age, not size. compress = save_due_to_age if save_due_to_size or save_due_to_age: self._data_file.close() self.data_production_lock.acquire() self.data_production += os.path.getsize(self._data_file_path) self.data_production_lock.release() # Spin off a thread to execute the XMLRPC command. # If it's a big file, it will take a while for the USB mgr # to copy the file to temp storage. The serial buffer # could overflow while waiting. save_file_thread = SaveFileThread(self._data_file_path, compress, self._log) # save_file_thread deletes data file after storage self._data_file_path = None self._data_file_state = 1 else: self._log.error( 'RxDataThread._process_rx_pkt: unknown state value') self._data_file_state = 1
def load_main_file(self, file_dict, callback=None, pg_val=None): """ Description : Main file loading method for issuu data file. Loads the file in chunk into a instance variable Parameters : A dictionnary containing the filename and number of lines in this file [file_dict]. The tkinter progress bar StringVar variable which will be modified to represent progress [pg_val]""" utils.logger.info("Started loading file") start_time = time.time() tmp = [] pd_reader = pd.read_json(file_dict['filename'], lines=True, chunksize=utils.CHUNK_SIZE) loop_count = ceil(file_dict['linecount'] / utils.CHUNK_SIZE) for i, df in utils.cli_pg_bar(enumerate(pd_reader), total=loop_count): df = df.loc[df['event_type'].isin( ['read'] )] #, 'pageread'])] # Since we are working with a random sample, some read documents don't have the "read" record but have "pageread" records tmp.append(df[utils.ISSUU_FIELDS]) if self.tk_gui and pg_val is not None: pg_val.set(i + 1) self.data = pd.concat(tmp, axis=0) self.data.drop_duplicates(['visitor_uuid', 'subject_doc_id'], inplace=True) if callback is not None: callback() self.file_loaded = True self.file_size = utils.get_file_size(file_dict['linecount']) utils.logger.info("Loading done") df_mem_usage = self.data.memory_usage().sum() / (1024 * 1024) utils.logger.debug( "Loaded in {0} seconds - dataframe using {1} MB".format( round(time.time() - start_time, 2), round(df_mem_usage, 2)))
def process_objects(self, expanded_objects=[]): """ process_objects(expanded_objects) Given a list of objects, determines if uploadable (binary), and then create a dictionary of: sha1_hash sha256_hash modified_time filesize Sha1_hash is only determined on first upload or if modified time and file size changed. """ objects_metadata = {} for obj in expanded_objects: # Process if object is uploadable if self.uploadable_object(obj): # Object name in metadata file. Replace \\ with / to remain consistent # accoss platforms object_name = utils.unix_path( os.path.relpath(obj, self.paths['shelves']) ) # Determine paths object_path = os.path.abspath(obj) object_metadata_file = '%s.pitem' % object_path # Add object to gitignore self.add_object_to_gitignore(obj) object_mtime = utils.get_modified_time(obj) object_file_size = utils.get_file_size(obj) # Use cached checksum since checksum hashing is cpu intensive and # file size and modified times are quicker. Checksums are force using # cli flag --checksum. if ( not self.options['checksum'] and os.path.exists(object_metadata_file) ): with open(object_metadata_file) as json_file: cached_metadata = json.load(json_file) # Use cached hash if filesize and mtime are the same if ( object_file_size == cached_metadata[object_name]['file_size'] and object_mtime == cached_metadata[object_name]['modified_time'] ): object_sha1_hash = cached_metadata[object_name]['sha1_hash'] if 'sha26_hash' in cached_metadata[object_name]: object_sha256_hash = cached_metadata[object_name]['sha256_hash'] else: object_sha256_hash = utils.get_sha256(obj) else: object_sha1_hash = utils.get_sha1(obj) object_sha256_hash = utils.get_sha256(obj) else: # Genertate hash if cached_metadat is not present object_sha1_hash = utils.get_sha1(obj) object_sha256_hash = utils.get_sha256(obj) # TODO remove sha1 check as its not needed. # Add object to metadata dictionary objects_metadata[object_name] = { 'sha1_hash': object_sha1_hash, 'sha256_hash': object_sha256_hash, 'modified_time': object_mtime, 'file_size': object_file_size, } return objects_metadata
max_file_size_Bytes = int(max_file_size_MiB) * BYTES_PER_MEBIBYTE dataset_paths = utils.get_files_with_suffix_in_dir(in_dir, ".ds") dataset_paths = utils.filter_paths( dataset_paths, # day_to_get=[1,2,3], # transmitter_id_to_get=[1,2,3,4,5] # day_to_get=[1], # transmitter_id_to_get=[1,2,3,4,5 day_to_get=[1], transmitter_id_to_get=[10, 11], # transmission_id_to_get=[1], ) total_records = int( sum([utils.get_file_size(f) for f in dataset_paths]) / record_size) out_file_path_format_str = out_dir + "/day-1_transmitters-10--11_shuffled_batch-{batch}_part-{part}.ds" assert (record_size * batch_size <= max_file_size_Bytes) print("========= pre-run summary =========") print("Will operate on the following paths:", dataset_paths) print("total_records:", total_records) print("in_dir:", in_dir) print("out_dir:", out_dir) print("out_batch_size:", batch_size) print("max_file_size_MiB:", int(max_file_size_Bytes / BYTES_PER_MEBIBYTE)) print("file_format_str:", out_file_path_format_str) print("Total disk needed (MiB):", total_records * record_size / BYTES_PER_MEBIBYTE)
def process_objects(self, expanded_objects=[]): """ process_objects(expanded_objects) Given a list of objects, determines if uploadable (binary), and then create a dictionary of: sha1_hash sha256_hash modified_time filesize Sha1_hash is only determined on first upload or if modified time and file size changed. """ objects_metadata = {} for obj in expanded_objects: # Process if object is uploadable if self.uploadable_object(obj): # Object name in metadata file. Replace \\ with / to remain consistent # accoss platforms object_name = utils.unix_path( os.path.relpath(obj, self.paths['shelves'])) # Determine paths object_path = os.path.abspath(obj) object_metadata_file = '%s.pitem' % object_path # Add object to gitignore self.add_object_to_gitignore(obj) object_mtime = utils.get_modified_time(obj) object_file_size = utils.get_file_size(obj) # Use cached checksum since checksum hashing is cpu intensive and # file size and modified times are quicker. Checksums are force using # cli flag --checksum. if (not self.options['checksum'] and os.path.exists(object_metadata_file)): with open(object_metadata_file) as json_file: cached_metadata = json.load(json_file) # Use cached hash if filesize and mtime are the same if (object_file_size == cached_metadata[object_name]['file_size'] and object_mtime == cached_metadata[object_name]['modified_time']): object_sha1_hash = cached_metadata[object_name][ 'sha1_hash'] if 'sha26_hash' in cached_metadata[object_name]: object_sha256_hash = cached_metadata[object_name][ 'sha256_hash'] else: object_sha256_hash = utils.get_sha256(obj) else: object_sha1_hash = utils.get_sha1(obj) object_sha256_hash = utils.get_sha256(obj) else: # Genertate hash if cached_metadat is not present object_sha1_hash = utils.get_sha1(obj) object_sha256_hash = utils.get_sha256(obj) # TODO remove sha1 check as its not needed. # Add object to metadata dictionary objects_metadata[object_name] = { 'sha1_hash': object_sha1_hash, 'sha256_hash': object_sha256_hash, 'modified_time': object_mtime, 'file_size': object_file_size, } return objects_metadata
def calc_num_chunks_in_file(path, chunk_size): size = utils.get_file_size(path) assert (size % chunk_size == 0) return size / chunk_size
def get_vocab_size(self): vocab_size = {} for key, value in self.languages.items(): vocab_size[key] = utils.get_file_size( os.path.join(self.data_path, "vocab_{}".format(value))) return vocab_size
def message_text_handler(update: telegram.Update, context: telegram.ext.CallbackContext) -> None: message = update.effective_message if message is None: return chat = update.effective_chat if chat is None: return chat_type = chat.type bot = context.bot if cli_args.debug and not utils.check_admin( bot, context, message, analytics_handler, ADMIN_USER_ID): return message_id = message.message_id chat_id = message.chat.id user = message.from_user entities = message.parse_entities() if user is not None: create_or_update_user(bot, user) analytics_handler.track(context, analytics.AnalyticsType.MESSAGE, user) valid_entities = { entity: text for entity, text in entities.items() if entity.type in [telegram.MessageEntity.URL, telegram.MessageEntity.TEXT_LINK] } entity, text = next(iter(valid_entities.items())) if entity is None: return input_link = entity.url if input_link is None: input_link = text with io.BytesIO() as output_bytes: caption = None video_url = None audio_url = None try: yt_dl_options = {'logger': logger, 'no_color': True} with youtube_dl.YoutubeDL(yt_dl_options) as yt_dl: video_info = yt_dl.extract_info(input_link, download=False) if 'entries' in video_info: video = video_info['entries'][0] else: video = video_info if 'title' in video: caption = video['title'] else: caption = input_link file_size = None if 'requested_formats' in video: requested_formats = video['requested_formats'] video_data = list( filter( lambda requested_format: requested_format['vcodec'] != 'none', requested_formats))[0] audio_data = list( filter( lambda requested_format: requested_format['acodec'] != 'none', requested_formats))[0] if 'filesize' in video_data: file_size = video_data['filesize'] video_url = video_data['url'] if file_size is None: file_size = utils.get_file_size(video_url) audio_url = audio_data['url'] elif 'url' in video: video_url = video['url'] file_size = utils.get_file_size(video_url) if file_size is not None: if not utils.ensure_size_under_limit( file_size, telegram.constants.MAX_FILESIZE_UPLOAD, update, context): return except Exception as error: logger.error(f'youtube-dl error: {error}') if chat_type == telegram.Chat.PRIVATE and (caption is None or video_url is None): bot.send_message(chat_id, 'No video found on this link.', disable_web_page_preview=True, reply_to_message_id=message_id) return mp4_bytes = utils.convert(constants.OutputType.VIDEO, input_video_url=video_url, input_audio_url=audio_url) if not utils.ensure_valid_converted_file( file_bytes=mp4_bytes, update=update, context=context): return if mp4_bytes is not None: output_bytes.write(mp4_bytes) output_bytes.seek(0) if caption is not None: caption = caption[:telegram.constants.MAX_CAPTION_LENGTH] utils.send_video(bot, chat_id, message_id, output_bytes, caption, chat_type)