def parse_args(args): op = ArgumentParser(description='Find groups of orthologous genes.') #op.add_argument(dest='directory') op.add_argument('-o', '--out', '--dir', dest='out', required=True) op.add_argument('-g', '--gbs', '--annotations', dest='annotations') op.add_argument('-p', '--proteins', '--proteomes', dest='proteomes') op.add_argument('--no-download', dest='download_anno', action='store_false', default=True) op.add_argument('-s', '--species', '--species-list', dest='species_list') op.add_argument('-i', '--ids', '--ids-list', dest='ids_list') op.add_argument('--prot-id-field', dest='prot_id_field', default=1) op.usage = '''Finding orthogroups for a list of annotations / proteomes / ref ids / species. Test runs: python scenario_1.py --ids test_input/ids.txt -o test_ids python scenario_1.py --proteomes test_input/proteins -o test_proteomes Usage: %s [-p <proteomes dir>] [-a <.gb files dir>] [-i <gb ids file>] [-s <strain names file>] [-o <dir>] [--jobs 30] [--start-from <step num>] -o Output directory. -g Directory with .gb files for references with annotations. -p Directory with fasta (or faa, fa) files of protein sequences. If they are named by their reference ids (i.e. NC_005816.1.fasta), annotations will be downloaded from NCBI. -i File with reference ids (will be fetched from NCBI). -s File with a list of organism names as in Genbank. --prot-id-field When specifying proteomes, use this fasta id field number to retrieve protein ids (default if 1, like >NC_005816.1|NP_995567.1 ...). ''' % basename(__file__) #-a --annotations-dir Directory with .gb files. #-p --proteomes-dir Directory with fasta files of proteomes. #-i --ids-list File with reference ids (will be fetched from Genbank). #-s --species-list File with a list of organism names as in Genbank. # For example, "Salmonella enterica subsp. enterica serovar Typhi str. P-stx-12". #''' #indent = ' ' * len('usage: ' + basename(__file__) + ' ') #op.usage = basename(__file__) + ' [--annotations-dir DIR]\n' + \ # indent + '[--proteomes-dir DIR]\n' + \ # indent + '[--ids-file FILE]\n' + \ # indent + '[--species-file FILE]\n' add_common_arguments(op) p = op.parse_args(args) check_common_args(p) if not p.out: arg_parse_error('Specify output directory with -o.') if isfile(expanduser(p.out)): arg_parse_error('%s is a file' % p.out) p.out = abspath(expanduser(p.out)) if not isdir(p.out): makedirs(p.out) if p.species_list: check_file(expanduser(p.species_list)) p.species_list = abspath(expanduser(p.species_list)) if p.ids_list: check_file(expanduser(p.ids_list)) p.ids_list = abspath(expanduser(p.ids_list)) if p.proteomes: check_dir(expanduser(p.proteomes)) p.proteomes = abspath(expanduser(p.proteomes)) if p.annotations: check_dir(expanduser(p.annotations)) p.annotations = abspath(expanduser(p.annotations)) #if p.species_list or p.ids_list: # if not isdir(p.out): # mkdir(p.out) #else: # if not p.directory: # arg_parse_error('Directory or file must be specified.') # check_dir(p.directory) return p
def parse_args(args): import argparse op = argparse.ArgumentParser(description='Find groups of orthologous genes.') #op.add_argument(dest='directory', required=False) op.add_argument('-s1o', dest='directory', required=True) op.add_argument('-s2o', '-o', dest='out_dir', required=False) op.add_argument('-a', '--assemblies', dest='assemblies') op.add_argument('-g', '--annotations', '--gbs', dest='annotations') op.add_argument('-p', '--proteomes', '--proteins', dest='proteomes') op.add_argument('-i', '--ids', '--ids-list', dest='ids_list') op.add_argument('--prot-id-field', dest='prot_id_field', default=1) op.add_argument('--skip-blast-singletones', dest='blast_singletones', action='store_false', default=True) op.add_argument('--blastdb', '--blast-db', dest='blastdb') #-o: Output directory (if not specified, the input directory will be used). op.usage = '''Extends an orthogroup database and orthogroups files. First argument is a path to existed Scenario 1 output. Test runs: python scenario_2.py -s1o test_ids -s2o test_ids_new_ids --ids test_input/new_ids.txt python scenario_2.py -s1o test_proteomes -s2o test_prots_new_prots --proteomes test_input/new_proteins Usage: %s -s1o <scenario_1 result dir> -s2o <new output dir> [-a <assembies dir>] [-p <proteomes dir] [-a <.gb files dir>] [-i <gb ids file>] [-s <strain names file>] [--jobs 30] [--start-from <step num>] [--blast-singletones] [--blast-db <path>] -s1o Path to existed Scenario 1 output. -s2o Output directory (optional, if ommited, the input directory will be used). -a --assemblies: Directory with assemblies in fasta format. -g Directory with .gb files for references with annotations. -p Directory with fasta (or faa, fa) files of protein sequences. If they are named by their reference ids (i.e. NC_005816.1.fasta), annotations will be downloaded from NCBI. -i File with reference ids (will be fetched from NCBI). -s File with a list of organism names as in Genbank. --prot-id-field When specifying proteomes, use this fasta id field number to retrieve protein ids (default if 1, like >NC_005816.1|NP_995567.1 ...). --blast-singletones Search newly added proteins agains NCBI database, if they did not fit any group with known proteins. --blastdb Local BLAST database path. If not set, "blastdb" value in config.txt will be used. If it was not set either, remote NCBI will be used. ''' % basename(__file__) #indent = ' ' * len('usage: ' + basename(__file__) + ' ') #op.usage = basename(__file__) + ' [--existing-blast-results TSV]\n' + \ # indent + '[--existing_proteomes DIR]\n' + \ # indent + '[--assembly FASTA]\n' + \ # indent + '[--genes GFF]\n' + \ # indent + '[--proteome FASTA]\n' add_common_arguments(op) p = op.parse_args(args) check_common_args(p) if p.assemblies: check_dir(expanduser(p.assemblies)) p.assemblies = abspath(expanduser(p.assemblies)) if p.proteomes: check_dir(expanduser(p.proteomes)) p.proteomes = abspath(expanduser(p.proteomes)) if p.ids_list: check_file(expanduser(p.ids_list)) p.ids_list = abspath(expanduser(p.ids_list)) if p.annotations: check_dir(expanduser(p.annotations)) p.annotations = abspath(expanduser(p.annotations)) if p.blastdb: p.blastdb = abspath(expanduser(p.blastdb)) if not isdir(expanduser(p.directory)): arg_parse_error('Directory %s does not exist.' % p.directory) p.directory = abspath(expanduser(p.directory)) if not p.out_dir: p.out_dir = p.directory #arg_parse_error('Specify output directory with -o.') if isfile(expanduser(p.out_dir)): arg_parse_error('%s is a file' % p.out_dir) p.out_dir = abspath(expanduser(p.out_dir)) return p
def parse_args(args): import argparse op = argparse.ArgumentParser( description='Find groups of orthologous genes.') #op.add_argument(dest='directory', required=False) op.add_argument('-s1o', dest='directory', required=True) op.add_argument('-s2o', '-o', dest='out_dir', required=False) op.add_argument('-a', '--assemblies', dest='assemblies') op.add_argument('-g', '--annotations', '--gbs', dest='annotations') op.add_argument('-p', '--proteomes', '--proteins', dest='proteomes') op.add_argument('-i', '--ids', '--ids-list', dest='ids_list') op.add_argument('--prot-id-field', dest='prot_id_field', default=1) op.add_argument('--skip-blast-singletones', dest='blast_singletones', action='store_false', default=True) op.add_argument('--blastdb', '--blast-db', dest='blastdb') #-o: Output directory (if not specified, the input directory will be used). op.usage = '''Extends an orthogroup database and orthogroups files. First argument is a path to existed Scenario 1 output. Test runs: python scenario_2.py -s1o test_ids -s2o test_ids_new_ids --ids test_input/new_ids.txt python scenario_2.py -s1o test_proteomes -s2o test_prots_new_prots --proteomes test_input/new_proteins Usage: %s -s1o <scenario_1 result dir> -s2o <new output dir> [-a <assembies dir>] [-p <proteomes dir] [-a <.gb files dir>] [-i <gb ids file>] [-s <strain names file>] [--jobs 30] [--start-from <step num>] [--blast-singletones] [--blast-db <path>] -s1o Path to existed Scenario 1 output. -s2o Output directory (optional, if ommited, the input directory will be used). -a --assemblies: Directory with assemblies in fasta format. -g Directory with .gb files for references with annotations. -p Directory with fasta (or faa, fa) files of protein sequences. If they are named by their reference ids (i.e. NC_005816.1.fasta), annotations will be downloaded from NCBI. -i File with reference ids (will be fetched from NCBI). -s File with a list of organism names as in Genbank. --prot-id-field When specifying proteomes, use this fasta id field number to retrieve protein ids (default if 1, like >NC_005816.1|NP_995567.1 ...). --blast-singletones Search newly added proteins agains NCBI database, if they did not fit any group with known proteins. --blastdb Local BLAST database path. If not set, "blastdb" value in config.txt will be used. If it was not set either, remote NCBI will be used. ''' % basename(__file__) #indent = ' ' * len('usage: ' + basename(__file__) + ' ') #op.usage = basename(__file__) + ' [--existing-blast-results TSV]\n' + \ # indent + '[--existing_proteomes DIR]\n' + \ # indent + '[--assembly FASTA]\n' + \ # indent + '[--genes GFF]\n' + \ # indent + '[--proteome FASTA]\n' add_common_arguments(op) p = op.parse_args(args) check_common_args(p) if p.assemblies: check_dir(expanduser(p.assemblies)) p.assemblies = abspath(expanduser(p.assemblies)) if p.proteomes: check_dir(expanduser(p.proteomes)) p.proteomes = abspath(expanduser(p.proteomes)) if p.ids_list: check_file(expanduser(p.ids_list)) p.ids_list = abspath(expanduser(p.ids_list)) if p.annotations: check_dir(expanduser(p.annotations)) p.annotations = abspath(expanduser(p.annotations)) if p.blastdb: p.blastdb = abspath(expanduser(p.blastdb)) if not isdir(expanduser(p.directory)): arg_parse_error('Directory %s does not exist.' % p.directory) p.directory = abspath(expanduser(p.directory)) if not p.out_dir: p.out_dir = p.directory #arg_parse_error('Specify output directory with -o.') if isfile(expanduser(p.out_dir)): arg_parse_error('%s is a file' % p.out_dir) p.out_dir = abspath(expanduser(p.out_dir)) return p