help="Cluster file",
                        required=True)
    parser.add_argument("-o",
                        "--output_directory",
                        type=str,
                        help="Output folder",
                        required=True)

    args = parser.parse_args()

    #Create the output directory
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)

    #####Read the genome list
    genome_id_dictionary, genome_count = GenomeData.read_genome_list(
        args.genome_list_index)

    ###Read the annotation information
    protein_annotation, function_definitions = \
        AnnotationData.parse_annotation_folder(genome_id_dictionary.keys(), args.annotation_folder)

    ##Read the cluster information
    total_clusters = AnnotationData.get_cluster_information(args.cluster_file)

    ##Print log file
    logfile = open(args.output_directory + "/logfile.txt", 'w')

    ##Total number of clusters
    logfile.write("Total number of analyzed clusters: %d" %
                  len(total_clusters) + "\n")
Ejemplo n.º 2
0
parser.add_argument("-o", "--output_directory", type=str, help="Output folder", required=True)

args = parser.parse_args()

#Create output directory

if not os.path.exists(args.output_directory):
    os.makedirs(args.output_directory)
    os.makedirs(args.output_directory + "/nucleotide")
    os.makedirs(args.output_directory + "/protein")
    os.makedirs(args.output_directory + "/genome")
    os.makedirs(args.output_directory + "/annotation")
    os.makedirs(args.output_directory + "/coords")

#Read the genome list and create a dictionary with the information, and the total genome count
genome_dictionary, total_genome_count = GenomeData.read_genome_list(args.genome_list)


#Read the source option, and process the genome data

processed_genomes = 0

if args.genome_source == "jgi":
    processed_genomes = GenomeData.parse_jgi_dump(genome_dictionary, args.input_folder, args.output_directory)

if args.genome_source == "img_single":
    processed_genomes = GenomeData.parse_single_img(genome_dictionary, args.input_folder, args.output_directory)

print "Done processing %d genomes" % total_genome_count
print "A total of %d genomes were found in the input folder" % processed_genomes
                        help="File with the genome list. Format GenomeID, FullName, ShortName", required=True)
    parser.add_argument("-a", "--annotation_folder", type=str,
                        help="Folder with the annotation files from JGI", required=True)
    parser.add_argument("-c", "--cluster_file", type=str,
                        help="Cluster file", required=True)
    parser.add_argument("-o", "--output_directory", type=str,
                        help="Output folder", required=True)

    args = parser.parse_args()

    #Create the output directory
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)

    #####Read the genome list
    genome_id_dictionary, genome_count = GenomeData.read_genome_list(args.genome_list_index)

    ###Read the annotation information
    protein_annotation, function_definitions = \
        AnnotationData.parse_annotation_folder(genome_id_dictionary.keys(), args.annotation_folder)

    ##Read the cluster information
    total_clusters = AnnotationData.get_cluster_information(args.cluster_file)

    ##Print log file
    logfile = open(args.output_directory + "/logfile.txt", 'w')

    ##Total number of clusters
    logfile.write("Total number of analyzed clusters: %d" % len(total_clusters) + "\n")

    features_to_annotate = ["COG", "KO", "PFAM", "Product"]