argument_list = [["maxlen", "maxlen", int, -1, "Maximum length of reads"]] arguments = map(CLArgument._make, argument_list) if not len(sys.argv) > 2: sys.exit(getHelpStr(description, arguments) + "\n") (p_arg_map, args_remaining) = parseArgs(sys.argv[1:], arguments) minlen = int(args_remaining[0]) files = args_remaining[1:] if not all(map(os.path.exists, files)): sys.exit("Not all files exist") file_readers = starmap(fileIterator, zip(files, map(iteratorFromExtension, files))) filt_cond = lambda record: seqlen(record) > minlen if p_arg_map["maxlen"] > 1: filt_cond = lambda record: seqlen(record) > minlen and seqlen(record) <= p_arg_map["maxlen"] filtered_records = ifilter(filt_cond, chain.from_iterable(file_readers)) filtered_seqs = imap(recordToString, filtered_records) for seq in filtered_seqs: print seq
arguments = map(CLArgument._make, argument_list) if not len(sys.argv) > 2: sys.exit(getHelpStr(description, arguments) +"\n") (p_arg_map, args_remaining) = parseArgs(sys.argv[1:], arguments) minlen = int(args_remaining[0]) files = args_remaining[1:] if not all(map(os.path.exists,files)): sys.exit("Not all files exist") file_readers = starmap(fileIterator, zip(files, map(iteratorFromExtension, files))) filt_cond = lambda record : seqlen(record) > minlen if p_arg_map["maxlen"] > 1: filt_cond = lambda record: seqlen(record) > minlen and seqlen(record) <= p_arg_map["maxlen"] filtered_records = ifilter(filt_cond, chain.from_iterable(file_readers)) filtered_seqs = imap(recordToString, filtered_records) for seq in filtered_seqs: print seq
#Downsample a library import sys from nucio import typeify, fileIterator from seqio import iteratorFromExtension, recordToString, seqlen if not len(sys.argv) == 5: sys.exit("Usage: downsample.py genome_size desired_cov input.{fa,fq} output.{fa,fq}\n") types = [int, float, str, str] sysins = sys.argv[1:len(types)+1] (genome_size, target_cov, infn, outfn) = typeify(sysins,types) max_bases = genome_size * target_cov total_bases = 0 with open(outfn, "w") as of: for record in fileIterator(infn,iteratorFromExtension(infn)): length = seqlen(record) if "N" in record.seq: continue if total_bases > max_bases: break of.write(recordToString(record)) of.write("\n") total_bases += length
iterators = map(iteratorFromExtension, imap(itemgetter(1), openers)) openfuncs = map(defdef(open), imap(itemgetter(0),openers)) input_data = chain.from_iterable(starmap(fileIterator, zip(in_files, iterators, openfuncs))) total_reads = 0 dnum = 0 fnum = 0 fh = None readidx_fh = open("ReadIndex.txt", "w") recordString = recordToString if p_arg_map["samefmt"] else fastaRecordToString for record in input_data: if seqlen(record) < p_arg_map["minlen"]: continue if total_reads % rpf == 0: if total_reads % (rpf * fpd) == 0: dnum += 1 fnum = 0 os.mkdir(pstr(dnum)) fnum += 1 if fh: fh.close() current_file ="%s/p%s" % (pstr(dnum),pstr(fnum)) fh = open(current_file, "w") clean_name = str(record.name).split()[0] clean_record = record._replace(name=clean_name)