def n50(args): """ %prog n50 filename Given a file with a list of numbers denoting contig lengths, calculate N50. Input file can be both FASTA or a list of sizes. """ from jcvi.graphics.histogram import loghistogram p = OptionParser(n50.__doc__) p.add_option( "--print0", default=False, action="store_true", help="Print size and L50 to stdout", ) opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) ctgsizes = [] # Guess file format probe = open(args[0]).readline()[0] isFasta = probe == ">" if isFasta: for filename in args: f = Fasta(filename) ctgsizes += list(b for a, b in f.itersizes()) else: for row in must_open(args): try: ctgsize = int(float(row.split()[-1])) except ValueError: continue ctgsizes.append(ctgsize) a50, l50, nn50 = calculate_A50(ctgsizes) sumsize = sum(ctgsizes) minsize = min(ctgsizes) maxsize = max(ctgsizes) n = len(ctgsizes) print(", ".join(args), file=sys.stderr) summary = (sumsize, l50, nn50, minsize, maxsize, n) print( " ".join("{0}={1}".format(a, b) for a, b in zip(header, summary)), file=sys.stderr, ) loghistogram(ctgsizes) if opts.print0: print("\t".join(str(x) for x in (",".join(args), sumsize, l50))) return zip(header, summary)
def n50(args): """ %prog n50 filename Given a file with a list of numbers denoting contig lengths, calculate N50. Input file can be both FASTA or a list of sizes. """ from jcvi.graphics.histogram import loghistogram p = OptionParser(n50.__doc__) p.add_option( "--print0", default=False, action="store_true", help="Print size and L50 to stdout [default: %default]" ) opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) ctgsizes = [] # Guess file format probe = open(args[0]).readline()[0] isFasta = probe == ">" if isFasta: for filename in args: f = Fasta(filename) ctgsizes += list(b for a, b in f.itersizes()) else: for row in must_open(args): try: ctgsize = int(row.split()[-1]) except ValueError: continue ctgsizes.append(ctgsize) a50, l50, nn50 = calculate_A50(ctgsizes) sumsize = sum(ctgsizes) minsize = min(ctgsizes) maxsize = max(ctgsizes) n = len(ctgsizes) print >> sys.stderr, ", ".join(args) summary = (sumsize, l50, nn50, minsize, maxsize, n) print >> sys.stderr, " ".join("{0}={1}".format(a, b) for a, b in zip(header, summary)) loghistogram(ctgsizes) if opts.print0: print "\t".join(str(x) for x in (",".join(args), sumsize, l50)) return zip(header, summary)
def n50(args): """ %prog n50 filename Given a file with a list of numbers denoting contig lengths, calculate N50. Input file can be both FASTA or a list of sizes. """ p = OptionParser(n50.__doc__) opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) ctgsizes = [] # Guess file format probe = open(args[0]).readline()[0] isFasta = (probe == '>') if isFasta: for filename in args: f = Fasta(filename) ctgsizes += list(b for a, b in f.itersizes()) else: for row in must_open(args): try: ctgsize = int(row.split()[-1]) except ValueError: continue ctgsizes.append(ctgsize) a50, l50, nn50 = calculate_A50(ctgsizes) sumsize = sum(ctgsizes) minsize = min(ctgsizes) maxsize = max(ctgsizes) n = len(ctgsizes) print >> sys.stderr, ", ".join(args) summary = (sumsize, l50, nn50, minsize, maxsize, n) print >> sys.stderr, " ".join("{0}={1}".format(a, b) for a, b in \ zip(header, summary)) loghistogram(ctgsizes, summary=False) return zip(header, summary)
def summary(args): """ %prog summary cdhit.clstr Parse cdhit.clstr file to get distribution of cluster sizes. """ from jcvi.graphics.histogram import loghistogram p = OptionParser(summary.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) clstrfile, = args cf = ClstrFile(clstrfile) data = list(cf.iter_sizes()) loghistogram(data, summary=True)
def summary(args): """ %prog summary cdhit.clstr Parse cdhit.clstr file to get distribution of cluster sizes. """ from jcvi.graphics.histogram import loghistogram p = OptionParser(summary.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) (clstrfile, ) = args cf = ClstrFile(clstrfile) data = list(cf.iter_sizes()) loghistogram(data, summary=True)
def summary(args): """ %prog summary cdhit.clstr Parse cdhit.clstr file to get distribution of cluster sizes. """ from jcvi.graphics.histogram import loghistogram p = OptionParser(summary.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) clstrfile, = args assert clstrfile.endswith(".clstr") fp = open(clstrfile) data = [] for clstr, members in read_block(fp, ">"): size = len(members) data.append(size) loghistogram(data)