Example #1
0
def main():

    # Option Parse
    parser = argparse.ArgumentParser(description="A Tool to index and search large multifasta files")


    subparsers = parser.add_subparsers(title='subcommands',
                                       description='valid subcommands',
                                       help='Use retrieve_seq.py {subcommand} -h for help with each subcommand'
                                       )


    parser_index = subparsers.add_parser('index', help='Index all sequences in the database')

    parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed",
                        required=False)


    parser_extract = subparsers.add_parser('extract', help='Extract sequence in a multifasta')

    parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file",
                        required=False)

    parser_extract.add_argument('-e','--end', type=int,
                          help="end position on the fasta sequence",
                          required=False)

    parser_extract.add_argument('-s','--start', type=int,
                          help="start position on the fasta sequence",
                          required=False)

    parser_extract.add_argument('-g','--gene', type=str,
                          help="A gene (or chromossome) name",
                          required=False)

    parser_extract.add_argument('-l','--len', action='store_true',
                          help="Get the length of all genes. "
                               "If --gene get the length of the provided gene",
                          required=False)

    parser_splice = subparsers.add_parser('splice',
      help = 'Splices the gene in the specified positions')

    parser_splice.add_argument('-f', '--file',
      action='store',
      help='A multifasta file')

    parser_splice.add_argument('-r-', '--range',
      action='store',
      type=str,
      required=True,
      help='A list with the positions of the gene you wish to splice. Use the format "range1-range2, range3-range4", within quotation marks. Example: -r "10-20 30-40 50-60"')

    parser_splice.add_argument('-g', '--gene',
      action='store',
      type=str,
      help='The required gene.',
      required=True)

    args = parser.parse_args()


    # function hasattr must be used because args may or may not have arg.db, and test it with just an
    # if args.db does not work

    if hasattr(args, 'db'):
        db_index.create_index(args.db)
        print("DB {db} has been indexed".format(db=args.db))


    if hasattr(args, 'start') and args.start is not None:   # args.start exists and has a value
        fasta = args.file
        start = args.start
        end = args.end
        gene_name = args.gene
        generator = searchgen.generat(fasta)
        seq = ''.join(searchgen.search(fasta, generator, start, end, gene_name))

        print('>{gene}:{start}-{end}'.format(gene=gene_name,start=start,end=end))
        print(seq)
        print()


    if hasattr(args, 'len') and args.len:   # arg.len is True
        fasta = args.file
        gene_name = args.gene if args.gene else None
        searchgen.len(fasta, gene_name)

    if hasattr(args, 'range'):
        fasta = args.file
        gene_name = args.gene
        ranging = args.range
        generator = splice.generat(fasta)
        slices = ''.join(splice.slicer(fasta, generator, ranging, gene_name))

        print('>{gene} sliced in {range}:'.format(gene=gene_name,range=ranging))
        print(slices)
        print()
Example #2
0
def main():

    # Option Parse
    parser = argparse.ArgumentParser(description="A Tool to index and search large multifasta files")


    subparsers = parser.add_subparsers(title='subcommands',
                                       description='valid subcommands',
                                       help='Use retrieve_seq.py {subcommand} -h for help with each subcommand'
                                       )


    parser_index = subparsers.add_parser('index', help='Index all sequences in the database')

    parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed",
                        required=False)


    parser_extract = subparsers.add_parser('extract', help='Extract sequence in a multifasta')

    parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file",
                        required=False)

    parser_extract.add_argument('-e','--end', type=int,
                          help="end position on the fasta sequence",
                          required=False)

    parser_extract.add_argument('-s','--start', type=int,
                          help="start position on the fasta sequence",
                          required=False)

    parser_extract.add_argument('-g','--gene', type=str,
                          help="A gene (or chromossome) name",
                          required=False)

    parser_extract.add_argument('-l','--len', action='store_true',
                          help="Get the length of all genes. "
                               "If --gene get the length of the provided gene",
                          required=False)

    parser_splice = subparsers.add_parser('splice', help='Extract sequence in a multifasta')

    parser_splice.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file",
                        required=False)

    parser_splice.add_argument('-g','--gene', type=str,
                          help="A gene (or chromossome) name",
                          required=False)

    #parser_splice.add_argument('-l','--len', action='store_true',
                          #help="Get the length of all genes. "
                               #"If --gene get the length of the provided gene",
                          #required=False)

    parser_splice.add_argument('-r', '--range', action="store", nargs='+', required=False, help='Values in the form start-end space separated. 10-20                        50-60 70-100')

    args = parser.parse_args()


    # function hasattr must be used because args may or may not have arg.db, and test it with just an
    # if args.db does not work

    if hasattr(args, 'db'):
        db_index.create_index(args.db)
        print("DB {db} has been indexed".format(db=args.db))


    if hasattr(args, 'start') and args.start is not None:   # args.start exists and has a value
        fasta = args.file
        start = args.start
        end = args.end
        gene_name = args.gene
        seq = search_fasta.search(fasta, start, end, gene_name)
        
        print('>{gene}:{start}-{end}'.format(gene=gene_name,start=start,end=end))
        for i in seq:
            print(i, end='')
        print()


    if hasattr(args, 'len') and args.len:   # arg.len is True
        fasta = args.file
        gene_name = args.gene if args.gene else None
        search_fasta.length(fasta, gene_name)


    if hasattr(args, 'range') and args.range is not None:   # arg.splice exists and has a value
        fasta = args.file
        intervals = args.range
        gene_name = args.gene

        print(f'{gene_name}:{intervals}:')
        for interval in intervals:
            start = int(interval.split('-')[0])
            end = int(interval.split('-')[1])
            seq = search_fasta.search(fasta, start, end, gene_name)

            print(f'{start}-{end}: ', end='')
            for i in seq:
                print(i, end='')
            print()
        print()
Example #3
0
def main():

    # Criar um objeto do pacote argparser
    parser = argparse.ArgumentParser(
        description="A Tool to index and search large multifasta files")

    # Para criar um subcomando, adicionar o metodo add_subparser
    subparsers = parser.add_subparsers(
        title='subcommands',
        description='valid subcommands',
        help='Use retrieve_seq.py {subcommand} -h for help with each subcommand'
    )

    ######## INDEX
    # Com o metodo add_parser, adicionar ao subcomando ao help do comando
    parser_index = subparsers.add_parser(
        'index', help='Index all sequences in the database')

    # Adicionar os argumentos do subcomando
    parser_index.add_argument("--db",
                              dest='db',
                              default=None,
                              action="store",
                              help="A multifasta DB to be indexed",
                              required=False)

    ######## EXTRACT
    parser_extract = subparsers.add_parser(
        'extract', help='Extract sequence in a multifasta')

    parser_extract.add_argument('-f',
                                '--file',
                                dest='file',
                                action="store",
                                help="A multifasta file",
                                required=False)

    parser_extract.add_argument('-e',
                                '--end',
                                type=int,
                                help="end position on the fasta sequence",
                                required=False)

    parser_extract.add_argument('-s',
                                '--start',
                                type=int,
                                help="start position on the fasta sequence",
                                required=False)

    parser_extract.add_argument('-g',
                                '--gene',
                                type=str,
                                help="A gene (or chromossome) name",
                                required=False)

    parser_extract.add_argument(
        '-l',
        '--len',
        action='store_true',
        help="Get the length of all genes. "
        "If --gene get the length of the provided gene",
        required=False)

    ######## SPLICE
    parser_splice = subparsers.add_parser('splice',
                                          help='Retrieve multiples intervals')

    parser_splice.add_argument('-f',
                               '--file',
                               dest='file',
                               action="store",
                               help="A multifasta file",
                               required=False)

    parser_splice.add_argument('-g',
                               '--gene',
                               type=str,
                               help="A gene (or chromossome) name",
                               required=False)

    parser_splice.add_argument(
        '-r',
        '--range',
        dest='rg',
        action='store',
        nargs='+',
        required=False,
        help='Values in the form start-end space separated. 10-20 50-60 70-100'
    )

    ########
    args = parser.parse_args()

    # function hasattr must be used because args may or may not have arg.db, and test it with just an
    # if args.db does not work

    if hasattr(args, 'db'):
        db_index.create_index(args.db)
        print("DB {db} has been indexed".format(db=args.db))

    if hasattr(
            args, 'start'
    ) and args.start is not None:  # args.start exists and has a value
        fasta = args.file
        start = args.start
        end = args.end
        gene_name = args.gene
        seq = search.search_seq(fasta, start, end, gene_name)

        print('>{gene}:{start}-{end}'.format(gene=gene_name,
                                             start=start,
                                             end=end))
        for i in seq:
            print(i)

    if hasattr(args, 'len') and args.len:  # arg.len is True
        fasta = args.file
        gene_name = args.gene if args.gene else None
        search.length(fasta, gene_name)

    if hasattr(
            args,
            'rg') and args.rg is not None:  # args.start exists and has a value
        fasta = args.file
        gene_name = args.gene

        for interval in args.rg:
            gene_interval = interval.split('-')
            print('\n>{gene}:{start}-{end}'.format(gene=gene_name,
                                                   start=gene_interval[0],
                                                   end=gene_interval[1]))
            seq = search.search_seq(fasta, int(gene_interval[0]),
                                    int(gene_interval[1]), gene_name)
            for i in seq:
                print(i)
Example #4
0
def main():

    # Option Parse
    parser = argparse.ArgumentParser(description="A Tool to index and search large multifasta files")


    subparsers = parser.add_subparsers(title='subcommands',
                                       description='valid subcommands',
                                       help='Use retrieve_seq.py {subcommand} -h for help with each subcommand'
                                       )


    parser_index = subparsers.add_parser('index', help='Index all sequences in the database')

    parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed",
                        required=False)


    parser_extract = subparsers.add_parser('extract', help='Extract sequence in a multifasta')

    parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file",
                        required=False)

    parser_extract.add_argument('-e','--end', type=int,
                          help="end position on the fasta sequence",
                          required=False)

    parser_extract.add_argument('-s','--start', type=int,
                          help="start position on the fasta sequence",
                          required=False)

    parser_extract.add_argument('-g','--gene', type=str,
                          help="A gene (or chromossome) name",
                          required=False)

    parser_extract.add_argument('-l','--len', action='store_true',
                          help="Get the length of all genes. "
                               "If --gene get the length of the provided gene",
                          required=False)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
    parser_extract.add_argument('-r','--splice', nargs='+', action='store',
                          help="List of intervals to extract sequence",
                          required=False)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
    args = parser.parse_args()


    # function hasattr must be used because args may or may not have arg.db, and test it with just an
    # if args.db does not work

    if hasattr(args, 'db'):
        db_index.create_index(args.db)
        print("DB {db} has been indexed".format(db=args.db))


    # Metodo de pegar subsequencia de um valor apenas
    if hasattr(args, 'start') and args.start is not None:   # args.start exists and has a value
        fasta = args.file
        start = args.start
        end = args.end
        gene_name = args.gene
        seq = search_fasta.search(fasta, start, end, gene_name)

        print('>{gene}:{start}-{end}'.format(gene=gene_name,start=start,end=end))
        for i in seq:
          print(i)
        # print(seq)
        print()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

    # Metodo de pegar subsequencia de varios intervalos
    if hasattr(args, 'splice') and args.splice is not None:   # args.start exists and has a value
        fasta = args.file
        splice = args.splice
        gene_name = args.gene
                
        print('>{gene}:{splice}'.format(gene=gene_name,splice=splice))
        search_fasta.splice(fasta, splice, gene_name)
        # print(seq)
        # for i in my_seq:
        #   print(i)
        print()

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
    if hasattr(args, 'len') and args.len:   # arg.len is True
        fasta = args.file
        gene_name = args.gene if args.gene else None
        search_fasta.len(fasta, gene_name)
Example #5
0
def main():

    # Option Parse
    parser = argparse.ArgumentParser(
        description="A Tool to index and search large multifasta files")

    subparsers = parser.add_subparsers(
        title='subcommands',
        description='valid subcommands',
        help='Use retrieve_seq.py {subcommand} -h for help with each subcommand'
    )

    parser_index = subparsers.add_parser(
        'index', help='Index all sequences in the database')

    parser_index.add_argument("--db",
                              dest='db',
                              default=None,
                              action="store",
                              help="A multifasta DB to be indexed",
                              required=False)

    parser_extract = subparsers.add_parser(
        'extract', help='Extract sequence in a multifasta')

    parser_extract.add_argument('-f',
                                '--file',
                                dest='file',
                                action="store",
                                help="A multifasta file",
                                required=False)

    parser_extract.add_argument('-e',
                                '--end',
                                type=int,
                                help="end position on the fasta sequence",
                                required=False)

    parser_extract.add_argument('-s',
                                '--start',
                                type=int,
                                help="start position on the fasta sequence",
                                required=False)

    parser_extract.add_argument('-g',
                                '--gene',
                                type=str,
                                help="A gene (or chromossome) name",
                                required=False)

    parser_extract.add_argument(
        '-l',
        '--len',
        action='store_true',
        help="Get the length of all genes. "
        "If --gene get the length of the provided gene",
        required=False)

    #________________________________SPLICE________________________________________#

    parser_splice = subparsers.add_parser(
        'splice', help='Extract multiple sequences in a multifasta')

    parser_splice.add_argument('-f',
                               '--file',
                               dest='file',
                               action="store",
                               help="A multifasta file",
                               required=False)

    parser_splice.add_argument(
        '-r',
        '--range',
        type=pair,
        nargs='+',
        help="A values in the form start-end space separated",
        required=False)

    parser_splice.add_argument('-g',
                               '--gene',
                               type=str,
                               help="A gene (or chromossome) name",
                               required=False)

    #______________________________________________________________________________#

    args = parser.parse_args()

    # function hasattr must be used because args may or may not have arg.db, and test it with just an
    # if args.db does not work

    if hasattr(args, 'db'):
        db_index.create_index(args.db)
        print("DB {db} has been indexed".format(db=args.db))

    if hasattr(
            args, 'start'
    ) and args.start is not None:  # args.start exists and has a value
        fasta = args.file
        start = args.start
        end = args.end
        gene_name = args.gene

        ##############Tempo inicial
        tempo_inicial = time.clock()

        seq = search_fasta.search(fasta, start, end, gene_name)

        tempo_final = time.clock()
        print('Tempo: ')
        print(tempo_final - tempo_inicial)

        print('>{gene}:{start}-{end}'.format(gene=gene_name,
                                             start=start,
                                             end=end))
        for i in seq:
            print(i, end='')
        print()
        #print(list(seq))
        #print(seq)

    if hasattr(args, 'len') and args.len:  # arg.len is True
        fasta = args.file
        gene_name = args.gene if args.gene else None
        search_fasta.len(fasta, gene_name)

    if hasattr(args, 'range') and args.range:
        result = ''
        fasta = args.file
        gene_name = args.gene

        #print (fasta)
        print('>' + gene_name + ':', end='')

        for i in args.range:
            #print (i[0])
            fasta = args.file
            start = int(i[0])
            end = int(i[1])
            print("['" + str(i[0]) + '-' + str(i[1]) + "']", end='')
            #print (i[0]+','+i[1])
            seq = search_fasta.search(fasta, start, end, gene_name)

            for i in seq:
                #print (i)
                result += i
            #result += "\n"
    #print (args.range)

        print()
        print(result)
        print(len(result))
Example #6
0
def main():

    # Option Parse
    parser = argparse.ArgumentParser(
        description="A Tool to index and search large multifasta files")

    subparsers = parser.add_subparsers(
        title='subcommands',
        description='valid subcommands',
        help='Use retrieve_seq.py {subcommand} -h for help with each subcommand'
    )

    parser_index = subparsers.add_parser(
        'index', help='Index all sequences in the database')

    parser_index.add_argument("--db",
                              dest='db',
                              default=None,
                              action="store",
                              help="A multifasta DB to be indexed",
                              required=False)

    parser_extract = subparsers.add_parser(
        'extract', help='Extract sequence in a multifasta')

    parser_extract.add_argument('-f',
                                '--file',
                                dest='file',
                                action="store",
                                help="A multifasta file",
                                required=False)

    parser_extract.add_argument('-e',
                                '--end',
                                type=int,
                                help="end position on the fasta sequence",
                                required=False)

    parser_extract.add_argument('-s',
                                '--start',
                                type=int,
                                help="start position on the fasta sequence",
                                required=False)

    parser_extract.add_argument('-g',
                                '--gene',
                                type=str,
                                help="A gene (or chromossome) name",
                                required=False)

    parser_extract.add_argument(
        '-l',
        '--len',
        action='store_true',
        help="Get the length of all genes. "
        "If --gene get the length of the provided gene",
        required=False)

    parser_splicing = subparsers.add_parser(
        'splicing', help='Gets splicing portions and retrieves toguether')

    parser_splicing.add_argument('-f',
                                 '--file',
                                 action='store',
                                 required=False,
                                 help='insert multifasta file name')

    parser_splicing.add_argument(
        '-r',
        '--range',
        action='store',
        nargs='+',
        required=False,
        help=
        "Values should be writen as START-END separeted by space, example: 10-20 56-89"
    )

    parser_splicing.add_argument('-g',
                                 '--gene',
                                 action='store',
                                 required=False,
                                 help="A gene (or chromossome) name")

    args = parser.parse_args()

    # function hasattr must be used because args may or may not have arg.db, and test it with just an
    # if args.db does not work

    if hasattr(args, 'db'):
        db_index.create_index(args.db)
        print("DB {db} has been indexed".format(db=args.db))

    if hasattr(
            args, 'start'
    ) and args.start is not None:  # args.start exists and has a value
        fasta = args.file
        start = args.start
        end = args.end
        gene_name = args.gene
        gene_seq = search_fasta_gen.search(fasta, start, end, gene_name)
        #        seq = search_fasta.search(fasta, start, end, gene_name)

        print('>{gene}:{start}-{end}'.format(gene=gene_name,
                                             start=start,
                                             end=end))
        for line in gene_seq:
            print(line)
            print(len(line))
        print()

    if hasattr(args, 'len'):  # arg.len is True
        fasta = args.file
        gene_name = args.gene if args.gene else None
        search_fasta_gen.len(fasta, gene_name)

    if hasattr(args, 'range'):
        fasta = args.file
        range = args.range
        gene_name = args.gene
        print('>{gene}:{range}'.format(gene=gene_name, range=range))
        search_fasta_gen.splicing(fasta, range, gene_name)
Example #7
0
def main():

    # Option Parse
    parser = argparse.ArgumentParser(description="A Tool to index and search large multifasta files")


    subparsers = parser.add_subparsers(title='subcommands',
                                       description='valid subcommands',
                                       help='Use retrieve_seq.py {subcommand} -h for help with each subcommand'
                                       )


    parser_index = subparsers.add_parser('index', help='Index all sequences in the database')

    parser_index.add_argument("--db", dest='db', default=None, action="store", help="A multifasta DB to be indexed",
                        required=False)


    parser_extract = subparsers.add_parser('extract', help='Extract sequence in a multifasta')

    parser_extract.add_argument('-f', '--file', dest='file', action="store", help="A multifasta file",
                        required=False)

    parser_extract.add_argument('-e','--end', type=int,
                          help="end position on the fasta sequence",
                          required=False)

    parser_extract.add_argument('-s','--start', type=int,
                          help="start position on the fasta sequence",
                          required=False)

    parser_extract.add_argument('-g','--gene', type=str,
                          help="A gene (or chromossome) name",
                          required=False)

    parser_extract.add_argument('-l','--len', action='store_true',
                          help="Get the length of all genes. "
                               "If --gene get the length of the provided gene",
                          required=False)

    parser_splice = subparsers.add_parser('splice', help="Extract sequence splices from a multifasta")

    parser_splice.add_argument('-f', '--file', action="store", help="A multifasta file", required=False)

    parser_splice.add_argument('-r', '--range', action="store", nargs="+", required=False, help="Values in the form "
                                                                                                "start-end space "
                                                                                                "separated."
                                                                                                "e.g.: 10-20 50-60 "
                                                                                                "70-100")

    parser_splice.add_argument('-g', '--gene', required=False, help="A gene (or chromossome) name")

    args = parser.parse_args()


    # function hasattr must be used because args may or may not have arg.db, and test it with just an
    # if args.db does not work

    if hasattr(args, 'db'):
        db_index.create_index(args.db)
        print("DB {db} has been indexed".format(db=args.db))


    if hasattr(args, 'start') and args.start is not None:   # args.start exists and has a value
        fasta = args.file
        start = args.start
        end = args.end
        gene_name = args.gene
        seq = search_fasta.search(fasta, start, end, gene_name)

        print('>{gene}:{start}-{end}'.format(gene=gene_name,start=start,end=end))
        for line in seq:
            print(line)


    if hasattr(args, 'len') and args.len:   # arg.len is True
        fasta = args.file
        gene_name = args.gene if args.gene else None
        search_fasta.len(fasta, gene_name)

    if hasattr(args, 'range') and args.range is not None:
        fasta= args.file
        ranges = args.range
        gene_name = args.gene

        print('>{gene}:{range_list}'.format(gene=gene_name,range_list=ranges))
        search_fasta.splice(fasta, ranges, gene_name)