def prepare_data_for_target_alignment(self, query_fasta, target_fasta, correspondence_file, out_dir, correspondence_query_column=0, correspondence_target_column=1): query_dict = self.parse_seq_file(query_fasta, "parse") target_dict = self.parse_seq_file(target_fasta, "parse") self.safe_mkdir(out_dir) correspondence_dict = SynDict(filename=correspondence_file, allow_repeats_of_key=True, key_index=correspondence_query_column, value_index=correspondence_target_column) for query_id in correspondence_dict: query_outfile = "%s/%s.query.fasta" % (out_dir, query_id) target_outfile = "%s/%s.target.fasta" % (out_dir, query_id) SeqIO.write(self.record_by_id_generator(query_dict, [query_id]), query_outfile, format="fasta") SeqIO.write(self.record_by_id_generator( target_dict, correspondence_dict[query_id]), target_outfile, format="fasta") queries_with_targets_set = set(correspondence_dict.keys()) queries_set = set(query_dict.keys()) return queries_with_targets_set, queries_set - queries_with_targets_set
pass fam_soft_fd = open( "%s%s/%s_with_outer_edges.graph" % (args.output_dir, family_name, family_name), "w") """ with open(args.hclust_input, "r") as in_fd: for line in in_fd: edge_nodes = line.split("\t")[:2] if check_edge_soft(edge_nodes, family_genes_ids): fam_soft_fd.write(line) """ for edge in graph_list: if check_edge_soft(edge[:-1], family_genes_ids): fam_soft_fd.write("\t".join(edge) + "\n") fam_soft_fd.close() fam_strict_fd = open( "%s%s/%s.graph" % (args.output_dir, family_name, family_name), "w") with open( "%s%s/%s_with_outer_edges.graph" % (args.output_dir, family_name, family_name), "r") as in_fd: for line in in_fd: edge_nodes = line.split("\t")[:2] if check_edge_strict(edge_nodes, family_genes_ids): fam_strict_fd.write(line) fam_strict_fd.close() pool = Pool(args.threads) pool.map(extract_fam_graph, families_dict.keys())