Ejemplo n.º 1
0
    def prepare_data_for_target_alignment(self,
                                          query_fasta,
                                          target_fasta,
                                          correspondence_file,
                                          out_dir,
                                          correspondence_query_column=0,
                                          correspondence_target_column=1):

        query_dict = self.parse_seq_file(query_fasta, "parse")
        target_dict = self.parse_seq_file(target_fasta, "parse")

        self.safe_mkdir(out_dir)

        correspondence_dict = SynDict(filename=correspondence_file,
                                      allow_repeats_of_key=True,
                                      key_index=correspondence_query_column,
                                      value_index=correspondence_target_column)

        for query_id in correspondence_dict:
            query_outfile = "%s/%s.query.fasta" % (out_dir, query_id)
            target_outfile = "%s/%s.target.fasta" % (out_dir, query_id)

            SeqIO.write(self.record_by_id_generator(query_dict, [query_id]),
                        query_outfile,
                        format="fasta")
            SeqIO.write(self.record_by_id_generator(
                target_dict, correspondence_dict[query_id]),
                        target_outfile,
                        format="fasta")

        queries_with_targets_set = set(correspondence_dict.keys())
        queries_set = set(query_dict.keys())

        return queries_with_targets_set, queries_set - queries_with_targets_set
Ejemplo n.º 2
0
        pass

    fam_soft_fd = open(
        "%s%s/%s_with_outer_edges.graph" %
        (args.output_dir, family_name, family_name), "w")
    """
    with open(args.hclust_input, "r") as in_fd:
        for line in in_fd:
            edge_nodes = line.split("\t")[:2]
            if check_edge_soft(edge_nodes, family_genes_ids):
                fam_soft_fd.write(line)
    """
    for edge in graph_list:
        if check_edge_soft(edge[:-1], family_genes_ids):
            fam_soft_fd.write("\t".join(edge) + "\n")
    fam_soft_fd.close()
    fam_strict_fd = open(
        "%s%s/%s.graph" % (args.output_dir, family_name, family_name), "w")
    with open(
            "%s%s/%s_with_outer_edges.graph" %
        (args.output_dir, family_name, family_name), "r") as in_fd:
        for line in in_fd:
            edge_nodes = line.split("\t")[:2]
            if check_edge_strict(edge_nodes, family_genes_ids):
                fam_strict_fd.write(line)
    fam_strict_fd.close()


pool = Pool(args.threads)
pool.map(extract_fam_graph, families_dict.keys())