if len(args) != 1: usage() sys.exit(2) basename = goby.Alignments.get_basename(args[0]) print "Compact Alignment basename =", basename alignment_reader = AlignmentReader(basename, verbose) header = alignment_reader.header tmh_reader = TooManyHitsReader(basename, verbose) tmh = tmh_reader.tmh entries_filesize = os.stat(basename + ".entries")[stat.ST_SIZE] print "Info from header:" print "Sorted:", header.sorted print "Indexed: ", header.indexed print "Number of target sequences = %s" % commify(header.number_of_targets) # target length stats target_length = len(header.target_length) if target_length > 0: min_target_length = min(header.target_length) max_target_length = max(header.target_length) mean_target_length = sum(header.target_length) / float(target_length) else: min_target_length = 0 max_target_length = 0 mean_target_length = 0 print "Number of target length entries = %s" % commify(target_length) print "Min target length = %s" % commify(min_target_length) print "Max target length = %s" % commify(max_target_length)
number_of_identifiers += 1 if entry.HasField("description"): number_of_descriptions += 1 if entry.HasField("sequence"): number_of_sequences += 1 if entry.HasField("sequence_pair"): number_of_sequences_pairs += 1 if entry.HasField("quality_scores"): number_of_quality_scores += 1 if entry.HasField("quality_scores_pair"): number_of_quality_score_pairs += 1 min_read_length = min(min_read_length, read_length) max_read_length = max(max_read_length, read_length) print "Average bytes per entry: %s" % commify(filesize / float(number_of_entries)) print "Average bytes per base: %s" % commify(filesize / float(total_read_length)) print "Has identifiers = %s (%s)" % (number_of_identifiers > 0, commify(number_of_identifiers)) print "Has descriptions = %s (%s)" % (number_of_descriptions > 0, commify(number_of_descriptions)) print "Has sequences = %s (%s)" % (number_of_sequences > 0, commify(number_of_sequences)) print "Has sequence pairs = %s (%s)" % (number_of_sequence_pairs > 0, commify(number_of_sequence_pairs)) print "Has quality scores = %s (%s)" % (number_of_quality_scores > 0, commify(number_of_quality_scores)) print "Has quality score pairs = %s (%s)" % (number_of_quality_score_pairs > 0, commify(number_of_quality_score_pairs)) print "Number of entries = %s" % commify(number_of_entries) print "Min read length = %s" % commify(min_read_length) print "Max read length = %s" % commify(max_read_length) print "Avg read length = %s" % commify(total_read_length / float(number_of_entries)) print "Avg read pair length = %s" % commify(total_read_length_pair / float(number_of_entries)) if __name__ == "__main__": main()
number_of_identifiers += 1 if entry.HasField("description"): number_of_descriptions += 1 if entry.HasField("sequence"): number_of_sequences += 1 if entry.HasField("sequence_pair"): number_of_sequences_pairs += 1 if entry.HasField("quality_scores"): number_of_quality_scores += 1 if entry.HasField("quality_scores_pair"): number_of_quality_score_pairs += 1 min_read_length = min(min_read_length, read_length) max_read_length = max(max_read_length, read_length) print "Average bytes per entry: %s" % commify( filesize / float(number_of_entries)) print "Average bytes per base: %s" % commify( filesize / float(total_read_length)) print "Has identifiers = %s (%s)" % (number_of_identifiers > 0, commify(number_of_identifiers)) print "Has descriptions = %s (%s)" % (number_of_descriptions > 0, commify(number_of_descriptions)) print "Has sequences = %s (%s)" % (number_of_sequences > 0, commify(number_of_sequences)) print "Has sequence pairs = %s (%s)" % (number_of_sequence_pairs > 0, commify(number_of_sequence_pairs)) print "Has quality scores = %s (%s)" % (number_of_quality_scores > 0, commify(number_of_quality_scores)) print "Has quality score pairs = %s (%s)" % ( number_of_quality_score_pairs > 0, commify(number_of_quality_score_pairs))