예제 #1
0
    if len(args) != 1:
        usage()
        sys.exit(2)

    basename = goby.Alignments.get_basename(args[0])
    print "Compact Alignment basename =", basename

    alignment_reader = AlignmentReader(basename, verbose)
    header = alignment_reader.header
    tmh_reader = TooManyHitsReader(basename, verbose)
    tmh = tmh_reader.tmh
    entries_filesize = os.stat(basename + ".entries")[stat.ST_SIZE]
    print "Info from header:"
    print "Sorted:", header.sorted
    print "Indexed: ", header.indexed
    print "Number of target sequences = %s" % commify(header.number_of_targets)

    # target length stats
    target_length = len(header.target_length)
    if target_length > 0:
        min_target_length = min(header.target_length)
        max_target_length = max(header.target_length)
        mean_target_length =  sum(header.target_length) / float(target_length)
    else:
        min_target_length = 0
        max_target_length = 0
        mean_target_length = 0

    print "Number of target length entries = %s" % commify(target_length)
    print "Min target length = %s" % commify(min_target_length)
    print "Max target length = %s" % commify(max_target_length)
예제 #2
0
            number_of_identifiers += 1
        if entry.HasField("description"):
            number_of_descriptions += 1
        if entry.HasField("sequence"):
            number_of_sequences += 1
        if entry.HasField("sequence_pair"):
            number_of_sequences_pairs += 1
        if entry.HasField("quality_scores"):
            number_of_quality_scores += 1
        if entry.HasField("quality_scores_pair"):
            number_of_quality_score_pairs += 1

        min_read_length = min(min_read_length, read_length)
        max_read_length = max(max_read_length, read_length)

    print "Average bytes per entry: %s" % commify(filesize / float(number_of_entries))
    print "Average bytes per base:  %s" % commify(filesize / float(total_read_length))
    print "Has identifiers = %s (%s)" % (number_of_identifiers > 0, commify(number_of_identifiers))
    print "Has descriptions = %s (%s)" % (number_of_descriptions > 0, commify(number_of_descriptions))
    print "Has sequences = %s (%s)" % (number_of_sequences > 0, commify(number_of_sequences))
    print "Has sequence pairs = %s (%s)" % (number_of_sequence_pairs > 0, commify(number_of_sequence_pairs))
    print "Has quality scores = %s (%s)" % (number_of_quality_scores > 0, commify(number_of_quality_scores))
    print "Has quality score pairs = %s (%s)" % (number_of_quality_score_pairs > 0, commify(number_of_quality_score_pairs))
    print "Number of entries = %s" % commify(number_of_entries)
    print "Min read length = %s" % commify(min_read_length)
    print "Max read length = %s" % commify(max_read_length)
    print "Avg read length = %s" % commify(total_read_length / float(number_of_entries))
    print "Avg read pair length = %s" % commify(total_read_length_pair / float(number_of_entries))

if __name__ == "__main__":
    main()
예제 #3
0
            number_of_identifiers += 1
        if entry.HasField("description"):
            number_of_descriptions += 1
        if entry.HasField("sequence"):
            number_of_sequences += 1
        if entry.HasField("sequence_pair"):
            number_of_sequences_pairs += 1
        if entry.HasField("quality_scores"):
            number_of_quality_scores += 1
        if entry.HasField("quality_scores_pair"):
            number_of_quality_score_pairs += 1

        min_read_length = min(min_read_length, read_length)
        max_read_length = max(max_read_length, read_length)

    print "Average bytes per entry: %s" % commify(
        filesize / float(number_of_entries))
    print "Average bytes per base:  %s" % commify(
        filesize / float(total_read_length))
    print "Has identifiers = %s (%s)" % (number_of_identifiers > 0,
                                         commify(number_of_identifiers))
    print "Has descriptions = %s (%s)" % (number_of_descriptions > 0,
                                          commify(number_of_descriptions))
    print "Has sequences = %s (%s)" % (number_of_sequences > 0,
                                       commify(number_of_sequences))
    print "Has sequence pairs = %s (%s)" % (number_of_sequence_pairs > 0,
                                            commify(number_of_sequence_pairs))
    print "Has quality scores = %s (%s)" % (number_of_quality_scores > 0,
                                            commify(number_of_quality_scores))
    print "Has quality score pairs = %s (%s)" % (
        number_of_quality_score_pairs > 0,
        commify(number_of_quality_score_pairs))