def add_to_barcode(read, barcode): latest_buf = barcode_bufs[barcode] if len(latest_buf[0].reads) >= MAX_BUF_SIZE: print("Writing file %d for %s" % (latest_buf[1], barcode)) with open("protobufs/%s_%i.protobuf" % (barcode, latest_buf[1]), 'wb') as output: output.write(latest_buf[0].SerializeToString()) latest_buf = (reads_pb2.FakReads(), barcode_bufs[barcode][1] + 1) barcode_bufs[barcode] = latest_buf new_read = latest_buf[0].reads.add() new_read.uuid = read.uuid new_read.mod_base_probs = read.mod_base_probs
protobuf_files = [] read_ids = [] print("Looking for protobufs") for filename in os.listdir(args.protobufs): if not args.nonbarcoded: if args.barcode in filename and ".protobuf" in filename: protobuf_files.append(args.protobufs + filename) else: if ".protobuf" in filename: protobuf_files.append(args.protobufs + filename) print("Reading protobufs") for protobuf_name in protobuf_files: protobuf = open(protobuf_name, 'rb') protobuf_reads = reads_pb2.FakReads() protobuf_reads.ParseFromString(protobuf.read()) for index, read in enumerate(protobuf_reads.reads): protobuf_index[read.uuid] = (protobuf_name, index) read_ids.append(read.uuid) protobuf.close() # Import sacCer3 reference sequence as a dictionary print("Importing {} reference sequence".format(genome_name)) genome_data = collections.OrderedDict() with open(args.genome, "r") as ref_seq: header = True for line in ref_seq: line = line.strip("\n") if line.startswith(">"): if not header:
if ".fast5" in entry: filenames.append(entry) print(filenames) print(len(filenames)) # Check if any fast5 files were found and only continue if some were if len(filenames) == 0: print("Error: Filenames length is zero") sys.exit() # Iterate through a list of filenames of the fast5 files currently in the directory for file in filenames: reads = reads_pb2.FakReads() file = file.split('.')[0] print(file) count_fast5 = 0 count_proto = 0 with get_fast5_file(file + ".fast5") as f5: count_fast5 = len(f5.get_read_ids()) for read_id in f5.get_read_ids(): read = f5.get_read(read_id) latest_basecall = read.get_latest_analysis("Basecall_1D") mod_base_table = read.get_analysis_dataset( latest_basecall, "BaseCalled_template/ModBaseProbs") read = reads.reads.add() read.uuid = read_id
import sys import reads_pb2 import os MAX_BUF_SIZE = 1000 barcode_bufs = { "unclassified": (reads_pb2.FakReads(), 0), "barcode01": (reads_pb2.FakReads(), 0), "barcode02": (reads_pb2.FakReads(), 0), "barcode03": (reads_pb2.FakReads(), 0), "barcode04": (reads_pb2.FakReads(), 0), "barcode05": (reads_pb2.FakReads(), 0), "barcode06": (reads_pb2.FakReads(), 0), "barcode07": (reads_pb2.FakReads(), 0), "barcode08": (reads_pb2.FakReads(), 0), "barcode09": (reads_pb2.FakReads(), 0), "barcode10": (reads_pb2.FakReads(), 0), "barcode11": (reads_pb2.FakReads(), 0), "barcode12": (reads_pb2.FakReads(), 0), "barcode13": (reads_pb2.FakReads(), 0), "barcode14": (reads_pb2.FakReads(), 0), "barcode15": (reads_pb2.FakReads(), 0), "barcode16": (reads_pb2.FakReads(), 0), "barcode17": (reads_pb2.FakReads(), 0), "barcode18": (reads_pb2.FakReads(), 0), "barcode19": (reads_pb2.FakReads(), 0), "barcode20": (reads_pb2.FakReads(), 0), "barcode21": (reads_pb2.FakReads(), 0), "barcode22": (reads_pb2.FakReads(), 0), "barcode23": (reads_pb2.FakReads(), 0),