Esempio n. 1
0
def main(args):
    """Entry point for repeat read consensus creation."""
    # arg parser does not supply these
    args.tag_name = None
    args.tag_value = None
    args.tag_keep_missing = False

    logger = medaka.common.get_named_logger('Smolecule')
    medaka.common.mkdir_p(args.output, info='Results will be overwritten.')

    def _multi_file_reader():
        for fname in args.fasta:
            try:
                yield Read.from_fastx(fname)
            except Exception:
                pass

    if len(args.fasta) > 1:
        logger.info("Given {} input files, assuming one read per file.".format(
            len(args.fasta)))
        reads = _multi_file_reader()
    else:
        logger.info("Given one input file, subreads are assumed "
                    "to be grouped by read.")
        reads = Read.multi_from_fastx(args.fasta[0],
                                      depth_filter=args.depth,
                                      length_filter=args.length)

    logger.info("Running pre-medaka POA consensus for all reads.")
    t0 = now()
    header, consensuses, alignments = poa_workflow(reads, args.threads)
    t1 = now()

    logger.info("Writing medaka input bam for {} reads.".format(
        len(alignments)))
    bam_file = os.path.join(args.output, 'subreads_to_spoa.bam')
    write_bam(bam_file, alignments, header)

    spoa_file = os.path.join(args.output, 'poa.fasta')
    with open(spoa_file, 'w') as fh:
        for rname, cons in consensuses:
            fh.write('>{}\n{}\n'.format(rname, cons))

    logger.info("Running medaka consensus.")
    t2 = now()
    args.bam = bam_file
    out_dir = args.output
    args.output = os.path.join(out_dir, 'consensus.hdf')
    medaka.inference.predict(args)
    t3 = now()

    logger.info("Running medaka stitch.")
    args.inputs = [args.output]
    args.output = os.path.join(out_dir, 'consensus.fasta')
    args.regions = None
    medaka.stitch.stitch(args)
    logger.info("Single-molecule consensus sequences written to {}.".format(
        args.output))
    logger.info("POA time: {:.0f}s, medaka time: {:.0f}s".format(
        t1 - t0, t3 - t2))
Esempio n. 2
0
 def run(self):
     self.start_time = now()
     self.update(self.puzzle)
     ga = GeneticAlgorithm(self.puzzle.copy(), update=self.update)
     ga.run(100, 100)  # runs population fo 100 for 100 generations
     self.delta = now() - self.start_time
     self.combine_paths(ga.paths)  # combines paths into 1 for WOC
Esempio n. 3
0
def main():
    if len(sys.argv) != 4:
        print("Usage: ./pytorch_query.py <image-path> <port> <batch-size>")
        sys.exit(1)

    image_path = sys.argv[1]
    port = sys.argv[2]
    batch_size = int(sys.argv[3])
    if not os.path.exists(image_path):
        print(image_path, "is not a valid path")
        sys.exit(1)

    img = mpimg.imread(image_path)
    img_flatten = img.flatten().astype(np.float32)
    img_bytes = img_flatten.tobytes()

    input_arr = [img_bytes] * batch_size

    start = now()
    with grpc.insecure_channel('localhost:' + port) as channel:
        stub = infaas_query_grpc.QueryStub(channel)
        request = infaas_query.QueryOnlineRequest(raw_input=input_arr)
        response = stub.QueryOnline(request)
    end = now()
    e2e = end - start
    #e2e_ms = e2e * 1000
    print('%.4f' % e2e)
Esempio n. 4
0
    def wrapper(*args, **kwargs):

        call_msg = "---> " + call_formatter.format_call(*args, skip_args=skip_args, **kwargs)
        call_logger.log(level, call_msg)
        _indentation.increment()

        start_time = now()
        try:
            call_result = func(*args, **kwargs)
            call_duration = now() - start_time
            call_result_type = type(call_result).__name__
            _indentation.decrement()

            if log_result:
                result_msg = f"<{call_result_type}, {call_result}>"
            else:
                result_msg = f"<{call_result_type}"
                if hasattr(call_result, "__len__"):
                    result_msg += f", len: {len(call_result)}"
                result_msg += f">"

            call_logger.log(level,
                            f"{call_formatter.full_name} <--- {result_msg} {call_duration * 1000:0.6f} ms.")
            return call_result

        except:
            call_duration = now() - start_time
            _indentation.decrement()
            call_logger.exception(
                f"{call_formatter.func_module}.{call_formatter.func_name} "
                f"<--- Exception after {call_duration * 1000:0.6f} ms.")
            raise
Esempio n. 5
0
    def execute(self, input_columns):
        input_count = len(input_columns[0])
        column_count = len(input_columns)
        assert column_count == 1

        out_cols = []
        data = []
        for i in xrange(input_count):
            pil_im = Image.fromarray(input_columns[0][i])
            jpeg_image = self.convertToJpeg(pil_im)  # also convert to jpeg
            img = Image.open(BytesIO(jpeg_image))
            data.append(img)
        print('batch = # {:d} images'.format(len(data)))
        start = now()
        sym, arg_params, aux_params = self.load_model(f_symbol_file,
                                                      f_params_file)

        mod = mx.mod.Module(symbol=sym, label_names=None)
        mod.bind(for_training=False,
                 data_shapes=[('data', (input_count, 3, 224, 224))],
                 label_shapes=mod._label_shapes)
        mod.set_params(arg_params, aux_params, allow_missing=True)
        stop = now()
        delta = stop - start
        print('Time to load model: {:.4f}s'.format(delta))

        start = now()
        labels = self.predict(input_count, data, mod)  # a list of labels
        stop = now()
        delta = stop - start
        print('Time to predict: {:.4f}s'.format(delta))
        # print label
        # return [struct.pack('=i', label)]
        out_cols.append(labels)
        return out_cols
Esempio n. 6
0
    def execute(self,
                frame: Sequence[sp.FrameType]) -> Sequence[NumpyArrayFloat32]:
        batch_size = len(frame)

        start = now()
        batch_tensor = torch.from_numpy(
            np.moveaxis(np.concatenate(np.expand_dims(frame, axis=0), axis=0), 3, 1)) \
            .type(torch.FloatTensor)

        if not self.cpu_only:
            start = now()
            batch_tensor = batch_tensor.cuda()
            #print('Transfer to device: {:.3f}'.format(now() - start))

        batch_tensor /= 255.0

        batch_tensor -= self._mu
        batch_tensor /= self._sigma
        #print('Transform: {:.3f}'.format(now() - start))

        with torch.no_grad():
            start = now()
            output = self.model.forward(batch_tensor)
            #print('Forward: {:.3f}'.format(now() - start))

        if not self.cpu_only:
            start = now()
            output = output.cpu()
            #print('Transfer from device: {:.3f}'.format(now() - start))

        import sys
        sys.stdout.flush()

        return [output[i, :].numpy() for i in range(batch_size)]
Esempio n. 7
0
 def yield_batches(self):
     time_between = deque(maxlen=50)
     get_time = deque(maxlen=50)
     t0 = now()
     try:
         while True:
             t0, t1 = now(), t0
             ta = now()
             res = self._queue.get()
             if isinstance(res, Future):
                 res = res.result()
             get_time.append(now() - ta)
             time_between.append(t0 - t1)
             get_rate = np.mean(get_time)
             req_rate = np.mean(time_between) - get_rate
             self.logger.debug(
                 "Request every: {:5.3}s. Fetch time: {:5.3}.".format(
                     np.mean(time_between), np.mean(get_time)))
             self.logger.debug("Queue state: {}/{} ready.".format(
                 self.loaded_batches - self.taken_batches,
                 self.submitted_batches - self.taken_batches))
             self.taken_batches += 1
             yield res
     except Exception as e:
         self.logger.critical(
             "Exception caught why yielding batches: {}".format(e))
         self.stop()
         raise e
Esempio n. 8
0
def main(args):
    # TODO: probably want to be able to take files from S3 too
    # TODO: Update handler to take args into account

    inputName = ["300.webm", "125.mkv"]
    random.seed()

    urlEvals = args.get('url', '')
    prepareffmpeg(urlEvals)

    event = {
        'videoUrl': str(inputName[random.randrange(0, len(inputName - 1), 1)]),
        'outputBucket': 'thisoutput',
        'outputPrefix': '',
        'decodeFps': 30,
        'outputBatchSize': 100,
        'url': urlEvals
        # 'keepOutput': 'true'
    }
    start = now()
    result = handler(event, {})
    stop = now()
    delta = stop - start
    print('Time to decode is: {:.4f}s'.format(delta))
    print('Extract time is {:.4f}s'.format(result['body']['extract_time']))
    print('Transform time is {:.4f}s'.format(result['body']['transform_time']))
    print('Load time is {:.4f}s'.format(result['body']['load_time']))
Esempio n. 9
0
def main(args):
    inputBucket = 'thisbins'
    inputPrefix = ''
    startFrame = 0
    outputBatchSize = 50
    outputPrefix = ''
    totalFrame = 6221

    if (len(sys.argv) > 1):
        totalFrame = min(int(sys.argv[1]), totalFrame)

    for startFrame in xrange(0, totalFrame, WORK_PACKET_SIZE):
        event = {
            'inputBucket': inputBucket,
            'inputPrefix': inputPrefix,
            'startFrame': startFrame,
            'outputBatchSize': outputBatchSize,
            'outputPrefix': outputPrefix,
            'outputBucket': 'thisoutput'
        }
        start = now()
        result = handler(event, {})
        end = now()
        duration = (end - start) * 1000
        billedDuration = math.ceil(duration / 100.0) * 100.0
        print(
            'Duration: {:.2f} ms Billed Duration: {:.0f} ms   Memory Size: 1536 MB  Max Memory Used: 1536 MB'
            .format(duration, billedDuration))
Esempio n. 10
0
def compress_basecalls(args):
    """Entry point for RLE compression of a fasta/q file."""
    logger = medaka.common.get_named_logger('Compress_basecalls')

    reads = pysam.FastxFile(args.input)
    if args.threads > 1:
        pool = Pool(args.threads)
        compressed = pool.imap(compress_seq, reads)
    else:
        compressed = (compress_seq(r) for r in reads)

    t0 = now()
    if args.output is None:
        fh = sys.stdout
    else:
        fh = open(args.output, 'w')

    for read in compressed:
        fh.write('@{} {}\n{}\n'.format(read.name, read.comment, read.sequence))
        fh.write('{}\n{}\n'.format('+', read.quality))
    t1 = now()
    logger.info('Compressing {} took {:.3f}s.'.format(args.input, t1 - t0))

    if args.output is not None:
        fh.close()
Esempio n. 11
0
 def _fill_parallel(self):
     # process multiple regions at a time, up to a maximum to limit memory
     # use. Note that the number of workers also serves as a memory
     # limit when data is being consumed as fast as it is produced.
     regions = iter(self.regions)
     futures = dict()
     submitted = True
     t0 = now()
     cache_check_interval = 3
     min_region_cache = 4
     with ThreadPoolExecutor(max_workers=self.bam_workers) as executor:
         while True:
             if submitted:
                 try:
                     submit_reg = next(regions)
                 except StopIteration:
                     break
             # try to submit
             if len(futures) < self.region_cache_size:
                 self.logger.debug("Submitting {}.".format(submit_reg))
                 futures[str(submit_reg)] = executor.submit(
                     self._run_region, self.bam, submit_reg, **self.kwargs)
                 submitted = True
             else:
                 submitted = False
             # try to fetch
             done = []
             for kreg, fut in futures.items():
                 if fut.done():
                     samples, remain = fut.result()
                     self.remainders.extend(remain)
                     for sample in samples:
                         self._results.put(sample)
                     done.append(kreg)
             for kreg in done:
                 del futures[kreg]
             # keep things flowing
             if now() - t0 > cache_check_interval:
                 t0 = now()
                 if self._results.qsize() < 0.5 * self.sample_cache_size:
                     self.logger.debug(
                         "Expanding region cache from {},".format(
                             self.region_cache_size))
                     self.region_cache_size += 1
                 elif self._results.qsize() > 0.9 * self.sample_cache_size:
                     self.logger.debug(
                         "Reducing region cache from {},".format(
                             self.region_cache_size))
                     self.region_cache_size = max(
                         min_region_cache, self.region_cache_size - 1)
                 else:
                     self.logger.debug("Region cache is good size.")
         # collect remaining futures
         for fut in as_completed(futures.values()):
             samples, remain = fut.result()
             self.remainders.extend(remain)
             for sample in samples:
                 self._results.put(sample)
     # signal everything has been processed
     self._results.put(StopIteration)
Esempio n. 12
0
    def recvPong(self):
        timeout = self.timeout
        entered_function = now()

        while True:
            # print "recvPong while ..."
            if now() - entered_function > timeout:
                # print "TIMEOUT!"
                return None, 0, 0, 0

            readable, writable, exceptional = select.select([self._socket], [],
                                                            [])

            if readable == []:
                # print "TIMEOUT [select]"
                return None, 0, None, None

            packet_data, address = self._socket.recvfrom(1024)
            # print "self._socket.recvfrom ..."
            recv_time = now()

            icmp_header = ICMPHeader(packet_data[20:28])
            ip_header = IPHeader(packet_data[:20])

            return recv_time, len(packet_data), ip_header, icmp_header
Esempio n. 13
0
def wait_until_all_finished(startFrame, numFrames, videoPrefix, args):
    batch = args.batch
    totalCount = len(xrange(startFrame, numFrames, batch))
    s3 = boto3.resource('s3')
    outputBucket = args.downloadBucket
    outputPrefix = args.downloadPrefix

    bar = progressbar.ProgressBar(maxval=totalCount, \
      widgets=[progressbar.Bar('=', 'Files     [', ']'), ' ',
               progressbar.Percentage()])
    bar.start()

    fileCount = 0
    time.sleep(2.0)  # sleep for 2 seconds to wait for decoder finished!
    startTime = now()
    timeOut = startTime + args.timeout
    while fileCount < totalCount:
        # list the number of objects
        myBucket = s3.Bucket(outputBucket)
        fileCount = sum(
            1 for _ in myBucket.objects.filter(Prefix='{}/{}_{}_{}/'.format(
                outputPrefix, videoPrefix, batch, batch)))
        bar.update(fileCount)
        if fileCount >= totalCount:
            break

        currTime = now()
        if currTime >= timeOut:
            print('Timed out in {:.4f} sec, cannot finish.'.format(currTime -
                                                                   startTime))
            break

        time.sleep(0.1)
    bar.finish()
    return fileCount
Esempio n. 14
0
def compress(args):
    if args.output is None:
        fh = sys.stdout
    else:
        fh = open(args.output, 'w')

    formats = {'a': 'fasta', 'q': 'fastq'}
    if args.input[-1] not in formats:
        msg = 'Could not guess file format of {}, rename to .f(ast)a/.f(ast)q'
        raise KeyError(msg.format(args.input))

    reads = SeqIO.parse(args.input, formats[args.input[-1]])
    if args.threads > 1:
        pool = Pool(args.threads)
        compressed = pool.imap(compress_seq, reads)
    else:
        compressed = (compress_seq(r) for r in reads)

    t0 = now()
    for description, compressed_seq, compressed_scores, runs in compressed:
        fh.write('@{}\n{}\n'.format(description, compressed_seq))
        fh.write('{}\n{}\n'.format('+', compressed_scores))
    t1 = now()
    logger.info('Compressing {} took {:.3f}s.'.format(args.input, t1 - t0))

    if args.output is not None:
        fh.close()
Esempio n. 15
0
    def create_thunks(self, inputs):
        start = now()
        # Perform sanity checks
        if not inputs:
            print("List of inputs is empty!")
            return

        if not isinstance(inputs, list):
            inputs = [inputs]

        # Check for valid inputs
        # If input type is GGThunk, the actual thunks need to be created
        # along with a placeholder per input
        cmd_inp = []
        if isinstance(inputs[0], GGThunk):
            # Set the input name before generating...needed to be
            # consistent with placeholder
            out_index = 0
            for inp in inputs:
                if inp.get_all_outname() == []:
                    next_filename = 'my_output_' + str(out_index) + '.out'
                    inp.add_outname(next_filename)
                    out_index += 1

            # Multithread thunk generation
            all_threads = []
            num_cores = mp.cpu_count()
            if len(inputs) < num_cores:
                for inp in inputs:
                    all_threads.append(self.__distr_thunk_gen([inp]))
            else:
                batch_size = int(len(inputs) / num_cores)
                for i in range(num_cores):
                    if i < num_cores - 1:
                        all_threads.append(
                            self.__distr_thunk_gen(
                                inputs[i * batch_size:i * batch_size +
                                       batch_size]))
                    else:
                        all_threads.append(
                            self.__distr_thunk_gen(inputs[i * batch_size:]))

            for at in all_threads:
                cmd_inp.extend(at.result())

            if len(cmd_inp) != len(inputs):
                print("Error: cmd_inp != inputs")
                sys.exit(1)
        elif isinstance(inputs[0], str):
            print("Nothing to generate...")
            cmd_inp = inputs
        else:
            print("invalid input: must be a GGThunk object")
            sys.exit(1)

        end = now()
        delta = end - start
        print("Time to generate thunks: %.3f seconds" % delta)
        return cmd_inp
Esempio n. 16
0
def basic():
    start = now()
    for number in range(1, 101):
        if number % 3 == 0: print("Fizz", end="")
        if number % 5 == 0: print("Buzz", end="")
        if not number % 3 == 0 and not number % 5 == 0: print(number)
    end = now()
    return start, end
Esempio n. 17
0
def time(f, iters=10):
    gc.disable()
    start = now()
    for _ in range(iters):
        f()
    end = now() - start
    gc.enable()
    return end
Esempio n. 18
0
 def samples_to_batch(samples, prep_func, name, batch, epoch):
     t0 = now()
     items = [prep_func(s) for s in samples]
     xs, ys = zip(*items)
     x, y = np.stack(xs), np.stack(ys)
     get_named_logger(name).debug(
         "Took {:5.3}s to load batch {} (epoch {})".format(
             now() - t0, batch, epoch))
     return x, y
Esempio n. 19
0
def slightlyOptimized():
    start = now()
    for number in range(1, 101):
        output = ""
        if number % 3 == 0: output += "Fizz"
        if number % 5 == 0: output += "Buzz"
        elif not number % 3 == 0 and not number % 5 == 0: output = number
        print(output)
    end = now()
    return start, end
Esempio n. 20
0
def main(args):
    vidStart = args.vidToProcess
    execEnv = args.execEnv
    nJobs = args.numJobs

    all_chunks = glob.glob(vidStart + '_chunk*')

    # Get all durations
    all_dur = {}
    for ac in all_chunks:
        ts = get_dur_fps(ac)
        all_dur[ac] = ts

    gg = GG()
    all_thunks = []

    start = now()
    for vidind, myvid in enumerate(all_chunks):
        ts = int(all_dur[myvid])
        num_out = int(ts) * 2
        all_outname = []
        for j in range(num_out):
            all_outname.append('frameout%03d_%03d.jpg' % (j + 1, vidind))

        next_cmd = CMD.format(video=myvid,
                              numout=num_out,
                              ofile='%03d' % vidind)
        next_cmd_split = next_cmd.split()
        gen_jpg_thunk = GGThunk(exe=next_cmd_split[0],
                                outname=all_outname,
                                exe_args=next_cmd_split[1:],
                                args_infiles=False)
        gen_jpg_thunk.add_infile(myvid)

        for j in range(num_out):
            pic_out = 'frameout%03d_%03d_lab.out' % (j + 1, vidind)
            last_cmd = CMD_IMREC.format(myimage=all_outname[j],
                                        myoutput=pic_out)
            last_cmd_split = last_cmd.split()
            last_thunk = GGThunk(exe=last_cmd_split[0],
                                 outname=pic_out,
                                 exe_args=last_cmd_split[1:],
                                 args_infiles=False)
            last_thunk.add_infile([
                'inception_v3_2016_08_28_frozen.pb',
                'imagenet_slim_labels.txt', (gen_jpg_thunk, all_outname[j])
            ])

            all_thunks.append(last_thunk)

    end = now()
    delta = end - start
    print("Total time to declare thunks: %.3f seconds" % delta)
    gg.create_and_force(all_thunks, showcomm=False, numjobs=nJobs, env=execEnv)
Esempio n. 21
0
def run(sc, op, name):
    vid = NamedVideoStream(sc, 'test1')
    inp = sc.io.Input([vid])
    #f = sc.streams.Gather(inp, [list(range(1000))])
    tf = op(frame=inp, batch=100, device=DeviceType.CPU)
    out = NamedStream(sc, 'qq')
    outp = sc.io.Output(tf, [out])

    s = now()
    sc.run(outp, PerfParams.estimate(), cache_mode=CacheMode.Overwrite, pipeline_instances_per_node=1)
    sc.table('qq').profiler().write_trace('{}.trace'.format(name))
    print('{:.1f}s'.format(now() - s))
Esempio n. 22
0
def test_busy_wait():
    """Tests waiter.stopwatch as side-effect"""

    from timeit import default_timer as now

    duration = 0.2

    start = now()
    time_tool.busy_wait(duration)
    took = now() - start

    took_vs_duration_percent = (took / duration) * 100
    assert took_vs_duration_percent > 99.999
Esempio n. 23
0
def optimizedWithStrListOutput():
    start = now()

    def fizzbuzzer(number):
        output = ""
        if number % 3 == 0: output += "Fizz"
        if number % 5 == 0: output += "Buzz"
        if not number % 3 == 0 and not number % 5 == 0: output = number
        return output

    print([str(fizzbuzzer(number)) for number in range(1, 101)])
    end = now()
    return start, end
Esempio n. 24
0
 def _fill_features(self):
     if self._source is None:
         self._quarantined = None
         t0 = now()
         if self.truth_bam is not None:
             self._source = self.fencoder.bams_to_training_samples(
                 self.truth_bam, self.bam, self.region, self.rle_ref,
                 self.read_fraction)
         else:
             self._source = self.fencoder.bam_to_sample(
                 self.bam, self.region, self.rle_ref, self.read_fraction)
         t1 = now()
         self.logger.info("Took {:.2f}s to make features.".format(t1 - t0))
Esempio n. 25
0
 def _fill_features(self):
     if self._source is None:
         t0 = now()
         if self.truth_bam is not None:
             self._source = self.fencoder.bams_to_training_samples(
                 self.truth_bam, self.bam, self.region, self.rle_ref,
                 self.read_fraction)
         else:
             self._source = self.fencoder.bam_to_sample(
                 self.bam, self.region, self.rle_ref, self.read_fraction)
             self._source = (self._source, )  # wrap to be the same as above
         t1 = now()
         self.logger.info("Took {:.2f}s to make features.".format(t1 - t0))
Esempio n. 26
0
 def _fill_features(self):
     if self._source is None:
         self._quarantined = None
         t0 = now()
         if self.truth_bam is not None:
             self._source = self.fencoder.bams_to_training_samples(
                 self.truth_bam, self.bam, self.region, self.label_scheme,
                 truth_haplotag=self.truth_haplotag,
                 min_length=self.min_truth_length)
         else:
             self._source = self.fencoder.bam_to_sample(
                 self.bam, self.region)
         t1 = now()
         self.logger.info("Took {:.2f}s to make features.".format(t1-t0))
def decode(db, t, image=False, device=DeviceType.CPU):
    frame = t.as_op().range(0, 10000)
    if image:
        if device == DeviceType.CPU:
            image_type = db.protobufs.ImageDecoderArgs.ANY
        else:
            image_type = db.protobufs.ImageDecoderArgs.JPEG
        frame = db.ops.ImageDecoder(img = frame, image_type = image_type)
    dummy = db.ops.DiscardFrame(ignore = frame, device = device if not image else DeviceType.CPU)
    job = Job(columns = [dummy], name = 'example_dummy')
    start = now()
    out = db.run(job, force = True, work_item_size = 100, pipeline_instances_per_node = 1)
    out.profiler().write_trace('{}.trace'.format(t.name()))
    return now() - start
Esempio n. 28
0
 def __getitem__(self, idx):
     t0 = now()
     bs = self.batch_size
     start, stop = idx * bs, (idx + 1) * bs
     if self.dataset == 'validation':
         self.logger.debug("Request for batch {}: [{}:{}], {}".format(
             idx, start, stop, len(self.data)))
     samples = self.data[start:stop]
     batch = self.batcher.samples_to_batch(samples)
     if self.dataset == 'validation':
         self.logger.debug(
             "Took {:5.3}s to load batch {}. (epoch {})".format(
                 now() - t0, idx, self.epoch))
     return batch
Esempio n. 29
0
def run_prediction(output, bam, regions, model, model_file, rle_ref, read_fraction, chunk_len, chunk_ovlp,
                   batch_size=200, save_features=False, tag_name=None, tag_value=None, tag_keep_missing=False):
    """Inference worker."""

    logger = get_named_logger('PWorker')

    def sample_gen():
        # chain all samples whilst dispensing with generators when done
        #   (they hold the feature vector in memory until they die)
        for region in regions:
            data_gen = SampleGenerator(
                bam, region, model_file, rle_ref, read_fraction,
                chunk_len=chunk_len, chunk_overlap=chunk_ovlp,
                tag_name=tag_name, tag_value=tag_value,
                tag_keep_missing=tag_keep_missing)
            yield from data_gen.samples
    batches = background_generator(
        grouper(sample_gen(), batch_size), 10
    )

    total_region_mbases = sum(r.size for r in regions) / 1e6
    logger.info("Running inference for {:.1f}M draft bases.".format(total_region_mbases))

    with DataStore(output, 'a') as ds:
        mbases_done = 0

        t0 = now()
        tlast = t0
        for data in batches:
            x_data = np.stack([x.features for x in data])
            class_probs = model.predict_on_batch(x_data)
            mbases_done += sum(x.span for x in data) / 1e6
            mbases_done = min(mbases_done, total_region_mbases)  # just to avoid funny log msg
            t1 = now()
            if t1 - tlast > 10:
                tlast = t1
                msg = '{:.1%} Done ({:.1f}/{:.1f} Mbases) in {:.1f}s'
                logger.info(msg.format(mbases_done / total_region_mbases, mbases_done, total_region_mbases, t1 - t0))

            best = np.argmax(class_probs, -1)
            for sample, prob, pred, feat in zip(data, class_probs, best, x_data):
                # write out positions and predictions for later analysis
                sample_d = sample._asdict()
                sample_d['label_probs'] = prob
                sample_d['features'] = feat if save_features else None
                ds.write_sample(Sample(**sample_d))

    logger.info('All done')
    return None
Esempio n. 30
0
def time_it(program_and_args, verbose=False):
    """Returns the time (in seconds) it takes to run a program."""

    if len(program_and_args) == 0:
        raise ValueError('program_and_args must contain at least one element')

    program = str.join(' ', program_and_args)

    if verbose:
        print(f"Running: {program}")

    start_time = now()

    subprocess.call(program, shell=True)

    return timedelta(seconds=now() - start_time).total_seconds()
Esempio n. 31
0
    def recvPong(self):
        timeout = self.timeout
        entered_function = now()

        while True:
            # print "recvPong while ..."
            if now() - entered_function > timeout:
                # print "TIMEOUT!"
                return None, 0, 0, 0

            readable, writable, exceptional = select.select([self._socket], [], [])

            if readable == []:
                # print "TIMEOUT [select]"
                return None, 0, None, None

            packet_data, address = self._socket.recvfrom(1024)
            # print "self._socket.recvfrom ..."
            recv_time = now()

            icmp_header = ICMPHeader(packet_data[20:28])
            ip_header = IPHeader(packet_data[:20])

            return recv_time, len(packet_data), ip_header, icmp_header
Esempio n. 32
0
    def test_execution_time(self):
        # this might be a bad test

        TARGET_RATIO = 0.5
        # something is probably wrong if it's not at least 50% faster

        start = now()
        for i in range(10):
            all = GenericQuerySet(Droplet).all()
            for droplet in all:
                droplet.publishable
                droplet.publication
        normal_time = now() - start

        start = now()
        for i in range(10):
            all = GenericQuerySet(Droplet).select_related_generic()
            for droplet in all:
                droplet.publishable
                droplet.publication
        select_generic_time = now() - start

        ratio = select_generic_time / normal_time
        self.assertTrue(ratio <= TARGET_RATIO)
Esempio n. 33
0
 def current_sample(self):
     """Current simulated sample (the simulation time)."""
     return int((now() - self.start_time) * self.sample_rate * self.time_warp + self.sample_offset)
Esempio n. 34
0
 def sendPing(self, request):
     sendTime = now()
     self._socket.sendto(request.asString(), (self.destination, 1))
     return sendTime
Esempio n. 35
0
 def reset_time(self):
     """Reset internal time to zero."""
     self.start_time = now()
     self.sample_offset = 0
     self._current_event = 0
     self._current_read = 0