예제 #1
0
"""
parser.add_argument("-o", "--output_file", action="store", dest="output", default="stdout",
                    help="Output file. Default: stdout")
"""
args = parser.parse_args()

# run after scripts/expansion/compare_cluster.py

# out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w")

species_syn_dict = TwoLvlDict()

for species in args.species_list:
    species_syn_dict[species] = read_synonyms_dict("%s%s/all.t" %
                                                   (args.species_dir, species))

species_syn_dict.write("families_all_species.t", absent_symbol=".")

not_assembled = species_syn_dict.filter_by_line(is_assembled)
species_syn_dict.write("correctly_assembled_families_species.t",
                       absent_symbol=".")

assembled_ids = IdSet(species_syn_dict.sl_keys())
assembled_ids.write("assembled_families.ids")
not_assembled_ids = IdSet(not_assembled.sl_keys())
not_assembled_ids.write("non_assembled_families.ids")
"""
if args.output != "stdout":
    out_fd.close()
"""
for alignment_file in args.input:
    alignment_name_list = FileRoutines.split_filename(alignment_file)
    output_prefix = "%s/%s.unique_positions" % (args.output_dir,
                                                alignment_name_list[1])

    unique_position_dict[alignment_name_list[
        1]] = MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file(
            alignment_file,
            output_prefix,
            format=args.format,
            gap_symbol="-",
            return_mode="relative",
            verbose=False)

species_list = unique_position_dict.sl_keys()

data_dict = OrderedDict()

for species in species_list:
    data_dict[species] = []
    for alignment in unique_position_dict:
        data_dict[species].append(unique_position_dict[alignment][species])

data_list = [data_dict[species] for species in data_dict]

MatplotlibRoutines.extended_percent_histogram(data_list,
                                              args.histogram_output,
                                              input_mode="percent",
                                              label=species_list)