def main(): # Option Parse parser = argparse.ArgumentParser(description="A Tool to index and search large multifasta files") subparsers = parser.add_subparsers(title='subcommands', description='valid subcommands', help='Use retrieve_seq.py {subcommand} -h for help with each subcommand' ) parser_index = subparsers.add_parser('index', help='Index all sequences in the database') parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed", required=False) parser_extract = subparsers.add_parser('extract', help='Extract sequence in a multifasta') parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_extract.add_argument('-e','--end', type=int, help="end position on the fasta sequence", required=False) parser_extract.add_argument('-s','--start', type=int, help="start position on the fasta sequence", required=False) parser_extract.add_argument('-g','--gene', type=str, help="A gene (or chromossome) name", required=False) parser_extract.add_argument('-l','--len', action='store_true', help="Get the length of all genes. " "If --gene get the length of the provided gene", required=False) parser_splice = subparsers.add_parser('splice', help = 'Splices the gene in the specified positions') parser_splice.add_argument('-f', '--file', action='store', help='A multifasta file') parser_splice.add_argument('-r-', '--range', action='store', type=str, required=True, help='A list with the positions of the gene you wish to splice. Use the format "range1-range2, range3-range4", within quotation marks. Example: -r "10-20 30-40 50-60"') parser_splice.add_argument('-g', '--gene', action='store', type=str, help='The required gene.', required=True) args = parser.parse_args() # function hasattr must be used because args may or may not have arg.db, and test it with just an # if args.db does not work if hasattr(args, 'db'): db_index.create_index(args.db) print("DB {db} has been indexed".format(db=args.db)) if hasattr(args, 'start') and args.start is not None: # args.start exists and has a value fasta = args.file start = args.start end = args.end gene_name = args.gene generator = searchgen.generat(fasta) seq = ''.join(searchgen.search(fasta, generator, start, end, gene_name)) print('>{gene}:{start}-{end}'.format(gene=gene_name,start=start,end=end)) print(seq) print() if hasattr(args, 'len') and args.len: # arg.len is True fasta = args.file gene_name = args.gene if args.gene else None searchgen.len(fasta, gene_name) if hasattr(args, 'range'): fasta = args.file gene_name = args.gene ranging = args.range generator = splice.generat(fasta) slices = ''.join(splice.slicer(fasta, generator, ranging, gene_name)) print('>{gene} sliced in {range}:'.format(gene=gene_name,range=ranging)) print(slices) print()
def main(): # Option Parse parser = argparse.ArgumentParser(description="A Tool to index and search large multifasta files") subparsers = parser.add_subparsers(title='subcommands', description='valid subcommands', help='Use retrieve_seq.py {subcommand} -h for help with each subcommand' ) parser_index = subparsers.add_parser('index', help='Index all sequences in the database') parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed", required=False) parser_extract = subparsers.add_parser('extract', help='Extract sequence in a multifasta') parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_extract.add_argument('-e','--end', type=int, help="end position on the fasta sequence", required=False) parser_extract.add_argument('-s','--start', type=int, help="start position on the fasta sequence", required=False) parser_extract.add_argument('-g','--gene', type=str, help="A gene (or chromossome) name", required=False) parser_extract.add_argument('-l','--len', action='store_true', help="Get the length of all genes. " "If --gene get the length of the provided gene", required=False) parser_splice = subparsers.add_parser('splice', help='Extract sequence in a multifasta') parser_splice.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_splice.add_argument('-g','--gene', type=str, help="A gene (or chromossome) name", required=False) #parser_splice.add_argument('-l','--len', action='store_true', #help="Get the length of all genes. " #"If --gene get the length of the provided gene", #required=False) parser_splice.add_argument('-r', '--range', action="store", nargs='+', required=False, help='Values in the form start-end space separated. 10-20 50-60 70-100') args = parser.parse_args() # function hasattr must be used because args may or may not have arg.db, and test it with just an # if args.db does not work if hasattr(args, 'db'): db_index.create_index(args.db) print("DB {db} has been indexed".format(db=args.db)) if hasattr(args, 'start') and args.start is not None: # args.start exists and has a value fasta = args.file start = args.start end = args.end gene_name = args.gene seq = search_fasta.search(fasta, start, end, gene_name) print('>{gene}:{start}-{end}'.format(gene=gene_name,start=start,end=end)) for i in seq: print(i, end='') print() if hasattr(args, 'len') and args.len: # arg.len is True fasta = args.file gene_name = args.gene if args.gene else None search_fasta.length(fasta, gene_name) if hasattr(args, 'range') and args.range is not None: # arg.splice exists and has a value fasta = args.file intervals = args.range gene_name = args.gene print(f'{gene_name}:{intervals}:') for interval in intervals: start = int(interval.split('-')[0]) end = int(interval.split('-')[1]) seq = search_fasta.search(fasta, start, end, gene_name) print(f'{start}-{end}: ', end='') for i in seq: print(i, end='') print() print()
def main(): # Criar um objeto do pacote argparser parser = argparse.ArgumentParser( description="A Tool to index and search large multifasta files") # Para criar um subcomando, adicionar o metodo add_subparser subparsers = parser.add_subparsers( title='subcommands', description='valid subcommands', help='Use retrieve_seq.py {subcommand} -h for help with each subcommand' ) ######## INDEX # Com o metodo add_parser, adicionar ao subcomando ao help do comando parser_index = subparsers.add_parser( 'index', help='Index all sequences in the database') # Adicionar os argumentos do subcomando parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed", required=False) ######## EXTRACT parser_extract = subparsers.add_parser( 'extract', help='Extract sequence in a multifasta') parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_extract.add_argument('-e', '--end', type=int, help="end position on the fasta sequence", required=False) parser_extract.add_argument('-s', '--start', type=int, help="start position on the fasta sequence", required=False) parser_extract.add_argument('-g', '--gene', type=str, help="A gene (or chromossome) name", required=False) parser_extract.add_argument( '-l', '--len', action='store_true', help="Get the length of all genes. " "If --gene get the length of the provided gene", required=False) ######## SPLICE parser_splice = subparsers.add_parser('splice', help='Retrieve multiples intervals') parser_splice.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_splice.add_argument('-g', '--gene', type=str, help="A gene (or chromossome) name", required=False) parser_splice.add_argument( '-r', '--range', dest='rg', action='store', nargs='+', required=False, help='Values in the form start-end space separated. 10-20 50-60 70-100' ) ######## args = parser.parse_args() # function hasattr must be used because args may or may not have arg.db, and test it with just an # if args.db does not work if hasattr(args, 'db'): db_index.create_index(args.db) print("DB {db} has been indexed".format(db=args.db)) if hasattr( args, 'start' ) and args.start is not None: # args.start exists and has a value fasta = args.file start = args.start end = args.end gene_name = args.gene seq = search.search_seq(fasta, start, end, gene_name) print('>{gene}:{start}-{end}'.format(gene=gene_name, start=start, end=end)) for i in seq: print(i) if hasattr(args, 'len') and args.len: # arg.len is True fasta = args.file gene_name = args.gene if args.gene else None search.length(fasta, gene_name) if hasattr( args, 'rg') and args.rg is not None: # args.start exists and has a value fasta = args.file gene_name = args.gene for interval in args.rg: gene_interval = interval.split('-') print('\n>{gene}:{start}-{end}'.format(gene=gene_name, start=gene_interval[0], end=gene_interval[1])) seq = search.search_seq(fasta, int(gene_interval[0]), int(gene_interval[1]), gene_name) for i in seq: print(i)
def main(): # Option Parse parser = argparse.ArgumentParser(description="A Tool to index and search large multifasta files") subparsers = parser.add_subparsers(title='subcommands', description='valid subcommands', help='Use retrieve_seq.py {subcommand} -h for help with each subcommand' ) parser_index = subparsers.add_parser('index', help='Index all sequences in the database') parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed", required=False) parser_extract = subparsers.add_parser('extract', help='Extract sequence in a multifasta') parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_extract.add_argument('-e','--end', type=int, help="end position on the fasta sequence", required=False) parser_extract.add_argument('-s','--start', type=int, help="start position on the fasta sequence", required=False) parser_extract.add_argument('-g','--gene', type=str, help="A gene (or chromossome) name", required=False) parser_extract.add_argument('-l','--len', action='store_true', help="Get the length of all genes. " "If --gene get the length of the provided gene", required=False) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # parser_extract.add_argument('-r','--splice', nargs='+', action='store', help="List of intervals to extract sequence", required=False) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # args = parser.parse_args() # function hasattr must be used because args may or may not have arg.db, and test it with just an # if args.db does not work if hasattr(args, 'db'): db_index.create_index(args.db) print("DB {db} has been indexed".format(db=args.db)) # Metodo de pegar subsequencia de um valor apenas if hasattr(args, 'start') and args.start is not None: # args.start exists and has a value fasta = args.file start = args.start end = args.end gene_name = args.gene seq = search_fasta.search(fasta, start, end, gene_name) print('>{gene}:{start}-{end}'.format(gene=gene_name,start=start,end=end)) for i in seq: print(i) # print(seq) print() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Metodo de pegar subsequencia de varios intervalos if hasattr(args, 'splice') and args.splice is not None: # args.start exists and has a value fasta = args.file splice = args.splice gene_name = args.gene print('>{gene}:{splice}'.format(gene=gene_name,splice=splice)) search_fasta.splice(fasta, splice, gene_name) # print(seq) # for i in my_seq: # print(i) print() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # if hasattr(args, 'len') and args.len: # arg.len is True fasta = args.file gene_name = args.gene if args.gene else None search_fasta.len(fasta, gene_name)
def main(): # Option Parse parser = argparse.ArgumentParser( description="A Tool to index and search large multifasta files") subparsers = parser.add_subparsers( title='subcommands', description='valid subcommands', help='Use retrieve_seq.py {subcommand} -h for help with each subcommand' ) parser_index = subparsers.add_parser( 'index', help='Index all sequences in the database') parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed", required=False) parser_extract = subparsers.add_parser( 'extract', help='Extract sequence in a multifasta') parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_extract.add_argument('-e', '--end', type=int, help="end position on the fasta sequence", required=False) parser_extract.add_argument('-s', '--start', type=int, help="start position on the fasta sequence", required=False) parser_extract.add_argument('-g', '--gene', type=str, help="A gene (or chromossome) name", required=False) parser_extract.add_argument( '-l', '--len', action='store_true', help="Get the length of all genes. " "If --gene get the length of the provided gene", required=False) #________________________________SPLICE________________________________________# parser_splice = subparsers.add_parser( 'splice', help='Extract multiple sequences in a multifasta') parser_splice.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_splice.add_argument( '-r', '--range', type=pair, nargs='+', help="A values in the form start-end space separated", required=False) parser_splice.add_argument('-g', '--gene', type=str, help="A gene (or chromossome) name", required=False) #______________________________________________________________________________# args = parser.parse_args() # function hasattr must be used because args may or may not have arg.db, and test it with just an # if args.db does not work if hasattr(args, 'db'): db_index.create_index(args.db) print("DB {db} has been indexed".format(db=args.db)) if hasattr( args, 'start' ) and args.start is not None: # args.start exists and has a value fasta = args.file start = args.start end = args.end gene_name = args.gene ##############Tempo inicial tempo_inicial = time.clock() seq = search_fasta.search(fasta, start, end, gene_name) tempo_final = time.clock() print('Tempo: ') print(tempo_final - tempo_inicial) print('>{gene}:{start}-{end}'.format(gene=gene_name, start=start, end=end)) for i in seq: print(i, end='') print() #print(list(seq)) #print(seq) if hasattr(args, 'len') and args.len: # arg.len is True fasta = args.file gene_name = args.gene if args.gene else None search_fasta.len(fasta, gene_name) if hasattr(args, 'range') and args.range: result = '' fasta = args.file gene_name = args.gene #print (fasta) print('>' + gene_name + ':', end='') for i in args.range: #print (i[0]) fasta = args.file start = int(i[0]) end = int(i[1]) print("['" + str(i[0]) + '-' + str(i[1]) + "']", end='') #print (i[0]+','+i[1]) seq = search_fasta.search(fasta, start, end, gene_name) for i in seq: #print (i) result += i #result += "\n" #print (args.range) print() print(result) print(len(result))
def main(): # Option Parse parser = argparse.ArgumentParser( description="A Tool to index and search large multifasta files") subparsers = parser.add_subparsers( title='subcommands', description='valid subcommands', help='Use retrieve_seq.py {subcommand} -h for help with each subcommand' ) parser_index = subparsers.add_parser( 'index', help='Index all sequences in the database') parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed", required=False) parser_extract = subparsers.add_parser( 'extract', help='Extract sequence in a multifasta') parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_extract.add_argument('-e', '--end', type=int, help="end position on the fasta sequence", required=False) parser_extract.add_argument('-s', '--start', type=int, help="start position on the fasta sequence", required=False) parser_extract.add_argument('-g', '--gene', type=str, help="A gene (or chromossome) name", required=False) parser_extract.add_argument( '-l', '--len', action='store_true', help="Get the length of all genes. " "If --gene get the length of the provided gene", required=False) parser_splicing = subparsers.add_parser( 'splicing', help='Gets splicing portions and retrieves toguether') parser_splicing.add_argument('-f', '--file', action='store', required=False, help='insert multifasta file name') parser_splicing.add_argument( '-r', '--range', action='store', nargs='+', required=False, help= "Values should be writen as START-END separeted by space, example: 10-20 56-89" ) parser_splicing.add_argument('-g', '--gene', action='store', required=False, help="A gene (or chromossome) name") args = parser.parse_args() # function hasattr must be used because args may or may not have arg.db, and test it with just an # if args.db does not work if hasattr(args, 'db'): db_index.create_index(args.db) print("DB {db} has been indexed".format(db=args.db)) if hasattr( args, 'start' ) and args.start is not None: # args.start exists and has a value fasta = args.file start = args.start end = args.end gene_name = args.gene gene_seq = search_fasta_gen.search(fasta, start, end, gene_name) # seq = search_fasta.search(fasta, start, end, gene_name) print('>{gene}:{start}-{end}'.format(gene=gene_name, start=start, end=end)) for line in gene_seq: print(line) print(len(line)) print() if hasattr(args, 'len'): # arg.len is True fasta = args.file gene_name = args.gene if args.gene else None search_fasta_gen.len(fasta, gene_name) if hasattr(args, 'range'): fasta = args.file range = args.range gene_name = args.gene print('>{gene}:{range}'.format(gene=gene_name, range=range)) search_fasta_gen.splicing(fasta, range, gene_name)
def main(): # Option Parse parser = argparse.ArgumentParser(description="A Tool to index and search large multifasta files") subparsers = parser.add_subparsers(title='subcommands', description='valid subcommands', help='Use retrieve_seq.py {subcommand} -h for help with each subcommand' ) parser_index = subparsers.add_parser('index', help='Index all sequences in the database') parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed", required=False) parser_extract = subparsers.add_parser('extract', help='Extract sequence in a multifasta') parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file", required=False) parser_extract.add_argument('-e','--end', type=int, help="end position on the fasta sequence", required=False) parser_extract.add_argument('-s','--start', type=int, help="start position on the fasta sequence", required=False) parser_extract.add_argument('-g','--gene', type=str, help="A gene (or chromossome) name", required=False) parser_extract.add_argument('-l','--len', action='store_true', help="Get the length of all genes. " "If --gene get the length of the provided gene", required=False) parser_splice = subparsers.add_parser('splice', help="Extract sequence splices from a multifasta") parser_splice.add_argument('-f', '--file', action="store", help="A multifasta file", required=False) parser_splice.add_argument('-r', '--range', action="store", nargs="+", required=False, help="Values in the form " "start-end space " "separated." "e.g.: 10-20 50-60 " "70-100") parser_splice.add_argument('-g', '--gene', required=False, help="A gene (or chromossome) name") args = parser.parse_args() # function hasattr must be used because args may or may not have arg.db, and test it with just an # if args.db does not work if hasattr(args, 'db'): db_index.create_index(args.db) print("DB {db} has been indexed".format(db=args.db)) if hasattr(args, 'start') and args.start is not None: # args.start exists and has a value fasta = args.file start = args.start end = args.end gene_name = args.gene seq = search_fasta.search(fasta, start, end, gene_name) print('>{gene}:{start}-{end}'.format(gene=gene_name,start=start,end=end)) for line in seq: print(line) if hasattr(args, 'len') and args.len: # arg.len is True fasta = args.file gene_name = args.gene if args.gene else None search_fasta.len(fasta, gene_name) if hasattr(args, 'range') and args.range is not None: fasta= args.file ranges = args.range gene_name = args.gene print('>{gene}:{range_list}'.format(gene=gene_name,range_list=ranges)) search_fasta.splice(fasta, ranges, gene_name)