def print_stats(): """A helper for printing statistics about the simulation""" data = ( args.rlc, args.rlu, args.max_threshold, args.offline_rate, utils.num_fmt(files_in_storage), utils.num_fmt(files_uploaded), 1 - files_in_storage / files_uploaded, utils.sizeof_fmt(data_in_storage), utils.sizeof_fmt(data_uploaded), 1 - data_in_storage / data_uploaded, utils.get_mem_info(), tmr.elapsed_str, tmr_start.elapsed_str, ) tmpl = ("Statistics: \n" " Params: RLc=%s, RLu=%s, max_threshold=%s, offline_rate=%s\n" " Files: files_in_storage=%s, files_uploaded=%s, DDP=%s\n" " Data: data_in_storage=%s, data_uploaded=%s, DDP=%s\n" " Execution: memory[%s], chunk_time=%s, total_time=%s") tmr.reset() print(tmpl % data, file=sys.stderr)
def _import(self, deflated_text): text = self._inflate(deflated_text) log('%s: Importing...' % self.file_name) start = time.time() if self.data_set_name == 'crawl': count = self._import_crawl(text) else: count = self._import_csv(text) self.batcher.flush() elapsed = time.time() - start speed = count / elapsed log('%s: Imported %s docs (%s per sec)' % (self.file_name, num_fmt(count), num_fmt(speed)))
def print_stats(): data = (utils.num_fmt(files_in_storage), utils.num_fmt(files_uploaded), 1 - files_in_storage / files_uploaded, utils.sizeof_fmt(data_in_storage), utils.sizeof_fmt(data_uploaded), 1 - data_in_storage / data_uploaded, utils.get_mem_info(), tmr.elapsed_str) tmpl = ("Statistics: \n" " Files: files_in_storage=%s, files_uploaded=%s, DDP=%s\n" " Data: data_in_storage=%s, data_uploaded=%s, DDP=%s\n" " Execution: memory[%s], chunk_time=%s") tmr.reset() print(tmpl % data, file=sys.stderr)
def _check_collection(self, collection_name): if self.opts.drop_collection: log("Dropping collection '%s'" % collection_name) self.db[collection_name].drop() count = self.db[collection_name].count() if count != 0: log("Skipping non-empty collection '%s' (%s docs)" % (collection_name, num_fmt(count))) return False log("Importing into collection '%s'" % collection_name) return True