def main(args): """Entry point for repeat read consensus creation.""" # arg parser does not supply these args.tag_name = None args.tag_value = None args.tag_keep_missing = False logger = medaka.common.get_named_logger('Smolecule') medaka.common.mkdir_p(args.output, info='Results will be overwritten.') def _multi_file_reader(): for fname in args.fasta: try: yield Read.from_fastx(fname) except Exception: pass if len(args.fasta) > 1: logger.info("Given {} input files, assuming one read per file.".format( len(args.fasta))) reads = _multi_file_reader() else: logger.info("Given one input file, subreads are assumed " "to be grouped by read.") reads = Read.multi_from_fastx(args.fasta[0], depth_filter=args.depth, length_filter=args.length) logger.info("Running pre-medaka POA consensus for all reads.") t0 = now() header, consensuses, alignments = poa_workflow(reads, args.threads) t1 = now() logger.info("Writing medaka input bam for {} reads.".format( len(alignments))) bam_file = os.path.join(args.output, 'subreads_to_spoa.bam') write_bam(bam_file, alignments, header) spoa_file = os.path.join(args.output, 'poa.fasta') with open(spoa_file, 'w') as fh: for rname, cons in consensuses: fh.write('>{}\n{}\n'.format(rname, cons)) logger.info("Running medaka consensus.") t2 = now() args.bam = bam_file out_dir = args.output args.output = os.path.join(out_dir, 'consensus.hdf') medaka.inference.predict(args) t3 = now() logger.info("Running medaka stitch.") args.inputs = [args.output] args.output = os.path.join(out_dir, 'consensus.fasta') args.regions = None medaka.stitch.stitch(args) logger.info("Single-molecule consensus sequences written to {}.".format( args.output)) logger.info("POA time: {:.0f}s, medaka time: {:.0f}s".format( t1 - t0, t3 - t2))
def run(self): self.start_time = now() self.update(self.puzzle) ga = GeneticAlgorithm(self.puzzle.copy(), update=self.update) ga.run(100, 100) # runs population fo 100 for 100 generations self.delta = now() - self.start_time self.combine_paths(ga.paths) # combines paths into 1 for WOC
def main(): if len(sys.argv) != 4: print("Usage: ./pytorch_query.py <image-path> <port> <batch-size>") sys.exit(1) image_path = sys.argv[1] port = sys.argv[2] batch_size = int(sys.argv[3]) if not os.path.exists(image_path): print(image_path, "is not a valid path") sys.exit(1) img = mpimg.imread(image_path) img_flatten = img.flatten().astype(np.float32) img_bytes = img_flatten.tobytes() input_arr = [img_bytes] * batch_size start = now() with grpc.insecure_channel('localhost:' + port) as channel: stub = infaas_query_grpc.QueryStub(channel) request = infaas_query.QueryOnlineRequest(raw_input=input_arr) response = stub.QueryOnline(request) end = now() e2e = end - start #e2e_ms = e2e * 1000 print('%.4f' % e2e)
def wrapper(*args, **kwargs): call_msg = "---> " + call_formatter.format_call(*args, skip_args=skip_args, **kwargs) call_logger.log(level, call_msg) _indentation.increment() start_time = now() try: call_result = func(*args, **kwargs) call_duration = now() - start_time call_result_type = type(call_result).__name__ _indentation.decrement() if log_result: result_msg = f"<{call_result_type}, {call_result}>" else: result_msg = f"<{call_result_type}" if hasattr(call_result, "__len__"): result_msg += f", len: {len(call_result)}" result_msg += f">" call_logger.log(level, f"{call_formatter.full_name} <--- {result_msg} {call_duration * 1000:0.6f} ms.") return call_result except: call_duration = now() - start_time _indentation.decrement() call_logger.exception( f"{call_formatter.func_module}.{call_formatter.func_name} " f"<--- Exception after {call_duration * 1000:0.6f} ms.") raise
def execute(self, input_columns): input_count = len(input_columns[0]) column_count = len(input_columns) assert column_count == 1 out_cols = [] data = [] for i in xrange(input_count): pil_im = Image.fromarray(input_columns[0][i]) jpeg_image = self.convertToJpeg(pil_im) # also convert to jpeg img = Image.open(BytesIO(jpeg_image)) data.append(img) print('batch = # {:d} images'.format(len(data))) start = now() sym, arg_params, aux_params = self.load_model(f_symbol_file, f_params_file) mod = mx.mod.Module(symbol=sym, label_names=None) mod.bind(for_training=False, data_shapes=[('data', (input_count, 3, 224, 224))], label_shapes=mod._label_shapes) mod.set_params(arg_params, aux_params, allow_missing=True) stop = now() delta = stop - start print('Time to load model: {:.4f}s'.format(delta)) start = now() labels = self.predict(input_count, data, mod) # a list of labels stop = now() delta = stop - start print('Time to predict: {:.4f}s'.format(delta)) # print label # return [struct.pack('=i', label)] out_cols.append(labels) return out_cols
def execute(self, frame: Sequence[sp.FrameType]) -> Sequence[NumpyArrayFloat32]: batch_size = len(frame) start = now() batch_tensor = torch.from_numpy( np.moveaxis(np.concatenate(np.expand_dims(frame, axis=0), axis=0), 3, 1)) \ .type(torch.FloatTensor) if not self.cpu_only: start = now() batch_tensor = batch_tensor.cuda() #print('Transfer to device: {:.3f}'.format(now() - start)) batch_tensor /= 255.0 batch_tensor -= self._mu batch_tensor /= self._sigma #print('Transform: {:.3f}'.format(now() - start)) with torch.no_grad(): start = now() output = self.model.forward(batch_tensor) #print('Forward: {:.3f}'.format(now() - start)) if not self.cpu_only: start = now() output = output.cpu() #print('Transfer from device: {:.3f}'.format(now() - start)) import sys sys.stdout.flush() return [output[i, :].numpy() for i in range(batch_size)]
def yield_batches(self): time_between = deque(maxlen=50) get_time = deque(maxlen=50) t0 = now() try: while True: t0, t1 = now(), t0 ta = now() res = self._queue.get() if isinstance(res, Future): res = res.result() get_time.append(now() - ta) time_between.append(t0 - t1) get_rate = np.mean(get_time) req_rate = np.mean(time_between) - get_rate self.logger.debug( "Request every: {:5.3}s. Fetch time: {:5.3}.".format( np.mean(time_between), np.mean(get_time))) self.logger.debug("Queue state: {}/{} ready.".format( self.loaded_batches - self.taken_batches, self.submitted_batches - self.taken_batches)) self.taken_batches += 1 yield res except Exception as e: self.logger.critical( "Exception caught why yielding batches: {}".format(e)) self.stop() raise e
def main(args): # TODO: probably want to be able to take files from S3 too # TODO: Update handler to take args into account inputName = ["300.webm", "125.mkv"] random.seed() urlEvals = args.get('url', '') prepareffmpeg(urlEvals) event = { 'videoUrl': str(inputName[random.randrange(0, len(inputName - 1), 1)]), 'outputBucket': 'thisoutput', 'outputPrefix': '', 'decodeFps': 30, 'outputBatchSize': 100, 'url': urlEvals # 'keepOutput': 'true' } start = now() result = handler(event, {}) stop = now() delta = stop - start print('Time to decode is: {:.4f}s'.format(delta)) print('Extract time is {:.4f}s'.format(result['body']['extract_time'])) print('Transform time is {:.4f}s'.format(result['body']['transform_time'])) print('Load time is {:.4f}s'.format(result['body']['load_time']))
def main(args): inputBucket = 'thisbins' inputPrefix = '' startFrame = 0 outputBatchSize = 50 outputPrefix = '' totalFrame = 6221 if (len(sys.argv) > 1): totalFrame = min(int(sys.argv[1]), totalFrame) for startFrame in xrange(0, totalFrame, WORK_PACKET_SIZE): event = { 'inputBucket': inputBucket, 'inputPrefix': inputPrefix, 'startFrame': startFrame, 'outputBatchSize': outputBatchSize, 'outputPrefix': outputPrefix, 'outputBucket': 'thisoutput' } start = now() result = handler(event, {}) end = now() duration = (end - start) * 1000 billedDuration = math.ceil(duration / 100.0) * 100.0 print( 'Duration: {:.2f} ms Billed Duration: {:.0f} ms Memory Size: 1536 MB Max Memory Used: 1536 MB' .format(duration, billedDuration))
def compress_basecalls(args): """Entry point for RLE compression of a fasta/q file.""" logger = medaka.common.get_named_logger('Compress_basecalls') reads = pysam.FastxFile(args.input) if args.threads > 1: pool = Pool(args.threads) compressed = pool.imap(compress_seq, reads) else: compressed = (compress_seq(r) for r in reads) t0 = now() if args.output is None: fh = sys.stdout else: fh = open(args.output, 'w') for read in compressed: fh.write('@{} {}\n{}\n'.format(read.name, read.comment, read.sequence)) fh.write('{}\n{}\n'.format('+', read.quality)) t1 = now() logger.info('Compressing {} took {:.3f}s.'.format(args.input, t1 - t0)) if args.output is not None: fh.close()
def _fill_parallel(self): # process multiple regions at a time, up to a maximum to limit memory # use. Note that the number of workers also serves as a memory # limit when data is being consumed as fast as it is produced. regions = iter(self.regions) futures = dict() submitted = True t0 = now() cache_check_interval = 3 min_region_cache = 4 with ThreadPoolExecutor(max_workers=self.bam_workers) as executor: while True: if submitted: try: submit_reg = next(regions) except StopIteration: break # try to submit if len(futures) < self.region_cache_size: self.logger.debug("Submitting {}.".format(submit_reg)) futures[str(submit_reg)] = executor.submit( self._run_region, self.bam, submit_reg, **self.kwargs) submitted = True else: submitted = False # try to fetch done = [] for kreg, fut in futures.items(): if fut.done(): samples, remain = fut.result() self.remainders.extend(remain) for sample in samples: self._results.put(sample) done.append(kreg) for kreg in done: del futures[kreg] # keep things flowing if now() - t0 > cache_check_interval: t0 = now() if self._results.qsize() < 0.5 * self.sample_cache_size: self.logger.debug( "Expanding region cache from {},".format( self.region_cache_size)) self.region_cache_size += 1 elif self._results.qsize() > 0.9 * self.sample_cache_size: self.logger.debug( "Reducing region cache from {},".format( self.region_cache_size)) self.region_cache_size = max( min_region_cache, self.region_cache_size - 1) else: self.logger.debug("Region cache is good size.") # collect remaining futures for fut in as_completed(futures.values()): samples, remain = fut.result() self.remainders.extend(remain) for sample in samples: self._results.put(sample) # signal everything has been processed self._results.put(StopIteration)
def recvPong(self): timeout = self.timeout entered_function = now() while True: # print "recvPong while ..." if now() - entered_function > timeout: # print "TIMEOUT!" return None, 0, 0, 0 readable, writable, exceptional = select.select([self._socket], [], []) if readable == []: # print "TIMEOUT [select]" return None, 0, None, None packet_data, address = self._socket.recvfrom(1024) # print "self._socket.recvfrom ..." recv_time = now() icmp_header = ICMPHeader(packet_data[20:28]) ip_header = IPHeader(packet_data[:20]) return recv_time, len(packet_data), ip_header, icmp_header
def wait_until_all_finished(startFrame, numFrames, videoPrefix, args): batch = args.batch totalCount = len(xrange(startFrame, numFrames, batch)) s3 = boto3.resource('s3') outputBucket = args.downloadBucket outputPrefix = args.downloadPrefix bar = progressbar.ProgressBar(maxval=totalCount, \ widgets=[progressbar.Bar('=', 'Files [', ']'), ' ', progressbar.Percentage()]) bar.start() fileCount = 0 time.sleep(2.0) # sleep for 2 seconds to wait for decoder finished! startTime = now() timeOut = startTime + args.timeout while fileCount < totalCount: # list the number of objects myBucket = s3.Bucket(outputBucket) fileCount = sum( 1 for _ in myBucket.objects.filter(Prefix='{}/{}_{}_{}/'.format( outputPrefix, videoPrefix, batch, batch))) bar.update(fileCount) if fileCount >= totalCount: break currTime = now() if currTime >= timeOut: print('Timed out in {:.4f} sec, cannot finish.'.format(currTime - startTime)) break time.sleep(0.1) bar.finish() return fileCount
def compress(args): if args.output is None: fh = sys.stdout else: fh = open(args.output, 'w') formats = {'a': 'fasta', 'q': 'fastq'} if args.input[-1] not in formats: msg = 'Could not guess file format of {}, rename to .f(ast)a/.f(ast)q' raise KeyError(msg.format(args.input)) reads = SeqIO.parse(args.input, formats[args.input[-1]]) if args.threads > 1: pool = Pool(args.threads) compressed = pool.imap(compress_seq, reads) else: compressed = (compress_seq(r) for r in reads) t0 = now() for description, compressed_seq, compressed_scores, runs in compressed: fh.write('@{}\n{}\n'.format(description, compressed_seq)) fh.write('{}\n{}\n'.format('+', compressed_scores)) t1 = now() logger.info('Compressing {} took {:.3f}s.'.format(args.input, t1 - t0)) if args.output is not None: fh.close()
def create_thunks(self, inputs): start = now() # Perform sanity checks if not inputs: print("List of inputs is empty!") return if not isinstance(inputs, list): inputs = [inputs] # Check for valid inputs # If input type is GGThunk, the actual thunks need to be created # along with a placeholder per input cmd_inp = [] if isinstance(inputs[0], GGThunk): # Set the input name before generating...needed to be # consistent with placeholder out_index = 0 for inp in inputs: if inp.get_all_outname() == []: next_filename = 'my_output_' + str(out_index) + '.out' inp.add_outname(next_filename) out_index += 1 # Multithread thunk generation all_threads = [] num_cores = mp.cpu_count() if len(inputs) < num_cores: for inp in inputs: all_threads.append(self.__distr_thunk_gen([inp])) else: batch_size = int(len(inputs) / num_cores) for i in range(num_cores): if i < num_cores - 1: all_threads.append( self.__distr_thunk_gen( inputs[i * batch_size:i * batch_size + batch_size])) else: all_threads.append( self.__distr_thunk_gen(inputs[i * batch_size:])) for at in all_threads: cmd_inp.extend(at.result()) if len(cmd_inp) != len(inputs): print("Error: cmd_inp != inputs") sys.exit(1) elif isinstance(inputs[0], str): print("Nothing to generate...") cmd_inp = inputs else: print("invalid input: must be a GGThunk object") sys.exit(1) end = now() delta = end - start print("Time to generate thunks: %.3f seconds" % delta) return cmd_inp
def basic(): start = now() for number in range(1, 101): if number % 3 == 0: print("Fizz", end="") if number % 5 == 0: print("Buzz", end="") if not number % 3 == 0 and not number % 5 == 0: print(number) end = now() return start, end
def time(f, iters=10): gc.disable() start = now() for _ in range(iters): f() end = now() - start gc.enable() return end
def samples_to_batch(samples, prep_func, name, batch, epoch): t0 = now() items = [prep_func(s) for s in samples] xs, ys = zip(*items) x, y = np.stack(xs), np.stack(ys) get_named_logger(name).debug( "Took {:5.3}s to load batch {} (epoch {})".format( now() - t0, batch, epoch)) return x, y
def slightlyOptimized(): start = now() for number in range(1, 101): output = "" if number % 3 == 0: output += "Fizz" if number % 5 == 0: output += "Buzz" elif not number % 3 == 0 and not number % 5 == 0: output = number print(output) end = now() return start, end
def main(args): vidStart = args.vidToProcess execEnv = args.execEnv nJobs = args.numJobs all_chunks = glob.glob(vidStart + '_chunk*') # Get all durations all_dur = {} for ac in all_chunks: ts = get_dur_fps(ac) all_dur[ac] = ts gg = GG() all_thunks = [] start = now() for vidind, myvid in enumerate(all_chunks): ts = int(all_dur[myvid]) num_out = int(ts) * 2 all_outname = [] for j in range(num_out): all_outname.append('frameout%03d_%03d.jpg' % (j + 1, vidind)) next_cmd = CMD.format(video=myvid, numout=num_out, ofile='%03d' % vidind) next_cmd_split = next_cmd.split() gen_jpg_thunk = GGThunk(exe=next_cmd_split[0], outname=all_outname, exe_args=next_cmd_split[1:], args_infiles=False) gen_jpg_thunk.add_infile(myvid) for j in range(num_out): pic_out = 'frameout%03d_%03d_lab.out' % (j + 1, vidind) last_cmd = CMD_IMREC.format(myimage=all_outname[j], myoutput=pic_out) last_cmd_split = last_cmd.split() last_thunk = GGThunk(exe=last_cmd_split[0], outname=pic_out, exe_args=last_cmd_split[1:], args_infiles=False) last_thunk.add_infile([ 'inception_v3_2016_08_28_frozen.pb', 'imagenet_slim_labels.txt', (gen_jpg_thunk, all_outname[j]) ]) all_thunks.append(last_thunk) end = now() delta = end - start print("Total time to declare thunks: %.3f seconds" % delta) gg.create_and_force(all_thunks, showcomm=False, numjobs=nJobs, env=execEnv)
def run(sc, op, name): vid = NamedVideoStream(sc, 'test1') inp = sc.io.Input([vid]) #f = sc.streams.Gather(inp, [list(range(1000))]) tf = op(frame=inp, batch=100, device=DeviceType.CPU) out = NamedStream(sc, 'qq') outp = sc.io.Output(tf, [out]) s = now() sc.run(outp, PerfParams.estimate(), cache_mode=CacheMode.Overwrite, pipeline_instances_per_node=1) sc.table('qq').profiler().write_trace('{}.trace'.format(name)) print('{:.1f}s'.format(now() - s))
def test_busy_wait(): """Tests waiter.stopwatch as side-effect""" from timeit import default_timer as now duration = 0.2 start = now() time_tool.busy_wait(duration) took = now() - start took_vs_duration_percent = (took / duration) * 100 assert took_vs_duration_percent > 99.999
def optimizedWithStrListOutput(): start = now() def fizzbuzzer(number): output = "" if number % 3 == 0: output += "Fizz" if number % 5 == 0: output += "Buzz" if not number % 3 == 0 and not number % 5 == 0: output = number return output print([str(fizzbuzzer(number)) for number in range(1, 101)]) end = now() return start, end
def _fill_features(self): if self._source is None: self._quarantined = None t0 = now() if self.truth_bam is not None: self._source = self.fencoder.bams_to_training_samples( self.truth_bam, self.bam, self.region, self.rle_ref, self.read_fraction) else: self._source = self.fencoder.bam_to_sample( self.bam, self.region, self.rle_ref, self.read_fraction) t1 = now() self.logger.info("Took {:.2f}s to make features.".format(t1 - t0))
def _fill_features(self): if self._source is None: t0 = now() if self.truth_bam is not None: self._source = self.fencoder.bams_to_training_samples( self.truth_bam, self.bam, self.region, self.rle_ref, self.read_fraction) else: self._source = self.fencoder.bam_to_sample( self.bam, self.region, self.rle_ref, self.read_fraction) self._source = (self._source, ) # wrap to be the same as above t1 = now() self.logger.info("Took {:.2f}s to make features.".format(t1 - t0))
def _fill_features(self): if self._source is None: self._quarantined = None t0 = now() if self.truth_bam is not None: self._source = self.fencoder.bams_to_training_samples( self.truth_bam, self.bam, self.region, self.label_scheme, truth_haplotag=self.truth_haplotag, min_length=self.min_truth_length) else: self._source = self.fencoder.bam_to_sample( self.bam, self.region) t1 = now() self.logger.info("Took {:.2f}s to make features.".format(t1-t0))
def decode(db, t, image=False, device=DeviceType.CPU): frame = t.as_op().range(0, 10000) if image: if device == DeviceType.CPU: image_type = db.protobufs.ImageDecoderArgs.ANY else: image_type = db.protobufs.ImageDecoderArgs.JPEG frame = db.ops.ImageDecoder(img = frame, image_type = image_type) dummy = db.ops.DiscardFrame(ignore = frame, device = device if not image else DeviceType.CPU) job = Job(columns = [dummy], name = 'example_dummy') start = now() out = db.run(job, force = True, work_item_size = 100, pipeline_instances_per_node = 1) out.profiler().write_trace('{}.trace'.format(t.name())) return now() - start
def __getitem__(self, idx): t0 = now() bs = self.batch_size start, stop = idx * bs, (idx + 1) * bs if self.dataset == 'validation': self.logger.debug("Request for batch {}: [{}:{}], {}".format( idx, start, stop, len(self.data))) samples = self.data[start:stop] batch = self.batcher.samples_to_batch(samples) if self.dataset == 'validation': self.logger.debug( "Took {:5.3}s to load batch {}. (epoch {})".format( now() - t0, idx, self.epoch)) return batch
def run_prediction(output, bam, regions, model, model_file, rle_ref, read_fraction, chunk_len, chunk_ovlp, batch_size=200, save_features=False, tag_name=None, tag_value=None, tag_keep_missing=False): """Inference worker.""" logger = get_named_logger('PWorker') def sample_gen(): # chain all samples whilst dispensing with generators when done # (they hold the feature vector in memory until they die) for region in regions: data_gen = SampleGenerator( bam, region, model_file, rle_ref, read_fraction, chunk_len=chunk_len, chunk_overlap=chunk_ovlp, tag_name=tag_name, tag_value=tag_value, tag_keep_missing=tag_keep_missing) yield from data_gen.samples batches = background_generator( grouper(sample_gen(), batch_size), 10 ) total_region_mbases = sum(r.size for r in regions) / 1e6 logger.info("Running inference for {:.1f}M draft bases.".format(total_region_mbases)) with DataStore(output, 'a') as ds: mbases_done = 0 t0 = now() tlast = t0 for data in batches: x_data = np.stack([x.features for x in data]) class_probs = model.predict_on_batch(x_data) mbases_done += sum(x.span for x in data) / 1e6 mbases_done = min(mbases_done, total_region_mbases) # just to avoid funny log msg t1 = now() if t1 - tlast > 10: tlast = t1 msg = '{:.1%} Done ({:.1f}/{:.1f} Mbases) in {:.1f}s' logger.info(msg.format(mbases_done / total_region_mbases, mbases_done, total_region_mbases, t1 - t0)) best = np.argmax(class_probs, -1) for sample, prob, pred, feat in zip(data, class_probs, best, x_data): # write out positions and predictions for later analysis sample_d = sample._asdict() sample_d['label_probs'] = prob sample_d['features'] = feat if save_features else None ds.write_sample(Sample(**sample_d)) logger.info('All done') return None
def time_it(program_and_args, verbose=False): """Returns the time (in seconds) it takes to run a program.""" if len(program_and_args) == 0: raise ValueError('program_and_args must contain at least one element') program = str.join(' ', program_and_args) if verbose: print(f"Running: {program}") start_time = now() subprocess.call(program, shell=True) return timedelta(seconds=now() - start_time).total_seconds()
def test_execution_time(self): # this might be a bad test TARGET_RATIO = 0.5 # something is probably wrong if it's not at least 50% faster start = now() for i in range(10): all = GenericQuerySet(Droplet).all() for droplet in all: droplet.publishable droplet.publication normal_time = now() - start start = now() for i in range(10): all = GenericQuerySet(Droplet).select_related_generic() for droplet in all: droplet.publishable droplet.publication select_generic_time = now() - start ratio = select_generic_time / normal_time self.assertTrue(ratio <= TARGET_RATIO)
def current_sample(self): """Current simulated sample (the simulation time).""" return int((now() - self.start_time) * self.sample_rate * self.time_warp + self.sample_offset)
def sendPing(self, request): sendTime = now() self._socket.sendto(request.asString(), (self.destination, 1)) return sendTime
def reset_time(self): """Reset internal time to zero.""" self.start_time = now() self.sample_offset = 0 self._current_event = 0 self._current_read = 0