#Get the coords for all the proteins and store in a dictionary gene_coords = defaultdict(tuple) coord_folder = args.data_folders + "/coords/" for coord_file in os.listdir(coord_folder): for line in open(coord_folder + coord_file, 'r'): line = line.rstrip() contig_id, protein_id, start, stop = line.split("\t") gene_coords[protein_id] = (contig_id, start, stop) #Get the annotation information annotation_folder = args.data_folders + "/annotation" protein_annotation, function_definitions = \ AnnotationTools.parse_annotation_folder(genome_gene_info.keys(), annotation_folder) #Print output table output_file = open(args.output_file, 'w') #Get the COG definitions cog_one_letter, desc_cog_letter, desc_cog_number = COG.cog_definitions() for genome in genome_gene_info: for protein in genome_gene_info[genome]: try: product = protein_annotation[protein]["Product"] except KeyError: product = None
parser.add_argument("-c", "--cluster_file", type=str, help="Cluster file", required=True) parser.add_argument("-o", "--output_directory", type=str, help="Output folder", required=True) args = parser.parse_args() #Create the output directory if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) #####Read the genome list genome_id_dictionary, genome_count = ClusterTools.read_genome_list(args.genome_list_index) ###Read the annotation information protein_annotation, function_definitions = AnnotationTools.parse_annotation_folder(genome_id_dictionary.keys(), args.annotation_folder) ##Read the cluster information total_clusters = ClusterTools.get_cluster_information(args.cluster_file) ##Print log file logfile = open(args.output_directory + "/logfile.txt", 'w') ##Total number of clusters logfile.write("Total number of analyzed clusters: %d" % len(total_clusters) + "\n") features_to_annotate = ["COG", "KO", "PFAM", "Product"] #Get the COG definitions cog_one_letter, desc_cog_letter, desc_cog_number = COG.cog_definitions()
# Get the coords for all the proteins and store in a dictionary gene_coords = defaultdict(tuple) coord_folder = args.data_folders + "/coords/" for coord_file in os.listdir(coord_folder): for line in open(coord_folder + coord_file, "r"): line = line.rstrip() contig_id, protein_id, start, stop = line.split("\t") gene_coords[protein_id] = (contig_id, start, stop) # Get the annotation information annotation_folder = args.data_folders + "/annotation" protein_annotation, function_definitions = AnnotationTools.parse_annotation_folder( genome_gene_info.keys(), annotation_folder ) # Print output table output_file = open(args.output_file, "w") # Get the COG definitions cog_one_letter, desc_cog_letter, desc_cog_number = COG.cog_definitions() for genome in genome_gene_info: for protein in genome_gene_info[genome]: try: product = protein_annotation[protein]["Product"] except KeyError: product = None