Esempio n. 1
0
for alignment_file in args.input:
    alignment_name_list = FileRoutines.split_filename(alignment_file)
    output_prefix = "%s/%s.unique_positions" % (args.output_dir,
                                                alignment_name_list[1])

    unique_position_dict[alignment_name_list[
        1]] = MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file(
            alignment_file,
            output_prefix,
            format=args.format,
            gap_symbol="-",
            return_mode="relative",
            verbose=False)

species_list = unique_position_dict.sl_keys()

data_dict = OrderedDict()

for species in species_list:
    data_dict[species] = []
    for alignment in unique_position_dict:
        data_dict[species].append(unique_position_dict[alignment][species])

data_list = [data_dict[species] for species in data_dict]

MatplotlibRoutines.extended_percent_histogram(data_list,
                                              args.histogram_output,
                                              input_mode="percent",
                                              label=species_list)
Esempio n. 2
0
                    help="Directory with families of species")
"""
parser.add_argument("-o", "--output_file", action="store", dest="output", default="stdout",
                    help="Output file. Default: stdout")
"""
args = parser.parse_args()

# run after scripts/expansion/compare_cluster.py

# out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w")

species_syn_dict = TwoLvlDict()

for species in args.species_list:
    species_syn_dict[species] = read_synonyms_dict("%s%s/all.t" % (args.species_dir, species))

species_syn_dict.write("families_all_species.t", absent_symbol=".")

not_assembled = species_syn_dict.filter_by_line(is_assembled)
species_syn_dict.write("correctly_assembled_families_species.t", absent_symbol=".")

assembled_ids = IdSet(species_syn_dict.sl_keys())
assembled_ids.write("assembled_families.ids")
not_assembled_ids = IdSet(not_assembled.sl_keys())
not_assembled_ids.write("non_assembled_families.ids")

"""
if args.output != "stdout":
    out_fd.close()
"""
Esempio n. 3
0
fam_count_dict = TwoLvlDict()
species_family_dict = TwoLvlDict()
for species in args.species_set:
    species_family_dict[species] = SynDict()
    species_family_dict[species].read(
        "%s%s%s" % (FileRoutines.check_path(args.input), species, args.suffix),
        split_values=True,
        values_separator=",",
        separator="\t")
    #print species_family_dict[species]
    fam_count_dict[species] = species_family_dict[species].count_synonyms()
    #print fam_count_dict[species]
    species_filtered_fd_list[species] = open(
        "%s%s.fam" % (args.filtered_family_dir, species), "w")

for family in fam_count_dict.sl_keys():
    genes_number_list = []
    number_of_species = 0
    for species in species_list:
        genes_number_list.append(fam_count_dict[species][family] if family in
                                 fam_count_dict[species] else 0)
        number_of_species += 1 if family in fam_count_dict[species] else 0

    number_str = "\t".join(map(str, genes_number_list))

    if (black_list and (family in black_list)) or (
            white_list and (family not in white_list)) or (
                number_of_species < args.min_species_number):
        filtered_fd.write("%s\t%s\t%s\n" % (family, family, number_str))
        for species in species_list:
            if family in species_family_dict[species]: