Esempio n. 1
0
def run(start=1, end='', gap_open=11, gap_extend=1, local_=False, global_=False, semiglobal=False,
        protein=False, translate=False, inter=False, table=False, mutations=False, strict=False,
        pep1=False, pep3=False, limit=1, verbose=False, target=None, query=None):
    """
    Performs an alignment between the query and target.
    """

    # Alignments over this size will take a long time!
    MAX_LEN = 100000

    # Set the verbosity of the process.
    utils.set_verbosity(logger, level=int(verbose))

    # Reset counter (needed for consistency during testing).
    jsonrec.reset_sequence_names()

    # This method requires two inputs.
    if not (query and target):
        utils.error(f"Please specify a TARGET and a QUERY")

    if global_:
        mode = const.GLOBAL_ALIGN
    elif local_:
        mode = const.LOCAL_ALIGN
    elif semiglobal:
        mode = const.SEMIGLOBAL_ALIGN
    else:
        mode = const.GLOBAL_ALIGN

    # A parameter for each record.
    common = dict(
        protein=protein, translate=translate, mutations=mutations, pep1=pep1, pep3=pep3,
        table=table, strict=strict, start=start, end=end, gap_open=gap_open, gap_extend=gap_extend,
        mode=mode
    )

    # Create parameters to represent each data.
    param_t = objects.Param(acc=target, **common)
    param_q = objects.Param(acc=query, **common)

    # Fill JSON data for parameters.
    param_t.json = fetch.get_json(param_t.acc, inter=inter, strict=True)[:limit]
    param_q.json = fetch.get_json(param_q.acc, inter=inter, strict=True)[:limit]

    # Each data object may contain several records.
    #
    # For more than one record we iterate in pairs
    #
    for rec1, rec2 in zip(param_q.json, param_t.json):
        qrecs = fastarec.get_fasta(rec1, param=param_q)
        trecs = fastarec.get_fasta(rec2, param=param_t)
        for qseq, tseq in zip(qrecs, trecs):

            if (len(qseq) > MAX_LEN):
                utils.error(f"query is longer than maximum: {len(qseq):,} > {MAX_LEN:,}")

            if (len(tseq) > MAX_LEN):
                utils.error(f"target sequence is longer than maximum: {len(tseq):,} > {MAX_LEN:,}")

            biopython_align(qseq=qseq, tseq=tseq, param=param_q)
Esempio n. 2
0
def run(limit=0,
        list_=False,
        flat=False,
        indent='   ',
        sep=', ',
        lineage=False,
        build=False,
        update=False,
        preload=False,
        download=False,
        verbose=False,
        *words):
    global SEP, INDENT

    limit = limit or None

    # Recognize string encodings: \t etc.
    INDENT = codecs.decode(indent, 'unicode_escape')
    SEP = codecs.decode(sep, 'unicode_escape')

    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    # Access the database.
    names, graph = get_data(preload=preload)

    if download:
        download_prebuilt()

    if list_:
        print_database(names=names, graph=graph)
        sys.exit()

    if update:
        update_taxdump()

    if build:
        build_database(limit=limit)

    terms = []
    # Attempts to fetch data if possible.
    for word in words:
        json = fetch.get_json(word)
        doubles = [jsonrec.find_taxid(rec) for rec in json] if json else [[]]
        taxids = [elem for sublist in doubles for elem in sublist]
        if taxids:
            terms.extend(taxids)
        else:
            terms.append(word)

    for word in terms:

        if lineage:
            print_lineage(word, names=names, flat=flat)
        else:
            query(word, names=names, graph=graph)

    # No terms listed. Print database stats.
    if not terms:
        print_stats(names=names, graph=graph)
Esempio n. 3
0
def run(project=False,
        limit='',
        sample=False,
        table=False,
        inter=False,
        verbose=False,
        *acc):

    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    if inter:
        # The query terms will be the same as the input
        collect = [(t, {PROJECT: t, SAMPLE: t}) for t in acc]
    else:
        # Parse the query terms from the data
        collect = process_storage(acc)

    for row in collect:
        name, metadata = row
        if project:
            term = metadata[PROJECT]
            search(term, tabular=table, limit=limit)
        elif sample:
            term = metadata[SAMPLE]
            search(term, tabular=table, limit=limit)
        else:
            print_links(row)
Esempio n. 4
0
    def make_param(acc):
        """
        Creates a parameter for each accession.

        """
        # Set the verbosity
        utils.set_verbosity(logger, level=int(verbose))

        # A simple wrapper class to carry all parameters around.
        p = objects.Param(start=start,
                          end=end,
                          seqid=seqid,
                          protein=protein,
                          revcomp=revcomp,
                          acc=acc,
                          translate=translate,
                          reverse=reverse,
                          uid=id_,
                          gff=gff,
                          complement=complement,
                          name=name,
                          inter=inter,
                          features=features,
                          fasta=fasta,
                          type=type,
                          gene=gene,
                          regexp=match,
                          transcribe=transcribe)

        # Fill the json data for the parameter if not an update
        p.json = fetch.get_json(p.acc, seqid=seqid, inter=inter)

        return p
Esempio n. 5
0
def run(update=False,
        rename='',
        seqid='',
        protein=False,
        verbose=False,
        *data):
    """
    Fetches and manages data in storage.
    """

    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    # Reset counter (needed for consistency during testing).
    jsonrec.reset_counter()

    # A simple wrapper class to represent input parameters.
    param = objects.Param(seqid=seqid,
                          rename=rename,
                          start=1,
                          protein=protein,
                          update=update)

    # Fetch the data.
    fetch_data(data, param=param)

    # Renaming after fetching.
    if rename:
        rename_data(data, param=param, newname=rename)
Esempio n. 6
0
def run(start=1,
        end='',
        mode=LOCAL_ALIGN,
        gap_open=11,
        gap_extend=1,
        protein=False,
        translate=False,
        inter=False,
        verbose=False,
        query='',
        target=''):
    """
    Handles an alignment request.
    """

    # Set the verbosity of the process.
    utils.set_verbosity(logger, level=int(verbose))

    # Ensure counter is reset.
    jsonrec.reset_counter()

    # Requires two inputs.
    if not (query and target):
        utils.error(f"Please specify both a QUERY and a TARGET")

    param1 = objects.Param(name=query,
                           protein=protein,
                           translate=translate,
                           start=start,
                           end=end,
                           gap_open=gap_open,
                           gap_extend=gap_extend,
                           mode=mode)
    param2 = objects.Param(name=target,
                           protein=protein,
                           translate=translate,
                           start=start,
                           end=end,
                           gap_open=gap_open,
                           gap_extend=gap_extend,
                           mode=mode)

    # Get the JSON data.
    param1.json = storage.get_json(param1.name, inter=inter, strict=True)
    param2.json = storage.get_json(param2.name, inter=inter, strict=True)

    for rec1 in param1.json:

        for rec2 in param2.json:

            qrecs = fastarec.get_fasta(rec1, param=param1)
            trecs = fastarec.get_fasta(rec2, param=param2)

            for qseq in qrecs:
                for tseq in trecs:
                    parasail_align(qseq=qseq, tseq=tseq, param=param1)
Esempio n. 7
0
def run(build=False, download=False, preload=False, verbose=False, *words):

    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    if download:
        download_taxdump()

    if build:
        build_database()

    for word in words:
        query(word, mode=preload)
Esempio n. 8
0
def run(query=None, build=False, download=False, preload=False, verbose=False):

    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    if download:
        download_terms()

    if build:
        build_database()

    if query:
        perform_query(query=query, preload=preload)
Esempio n. 9
0
    def make_param(name):
        """
        Creates a parameter for each accession.

        """
        # Set the verbosity
        utils.set_verbosity(logger, level=int(verbose))

        # A very common error to pass a fragment as
        if name.startswith("-"):
            msg = f"Invalid accession number: {name}"
            utils.error(msg)

        # A simple wrapper class to carry all parameters around.
        p = objects.Param(start=start, end=end, seqid=seqid, protein=protein, revcomp=revcomp,
                        update=update, name=name, gff=gff, translate=translate, reverse=reverse, complement=complement,
                        fasta=fasta, type=type, gene=gene, regexp=match, transcribe=transcribe)

        # Fill the json data for the parameter.
        p.json = storage.get_json(p.name, seqid=seqid, inter=inter)
        return p
Esempio n. 10
0
def manage(delete, verbose=False):
    """
    Shows the data in the storage.

    Usage:

        bio manage                   : lists the data
        bio manage --delete foo      : deletes data called foo
        bio manage --delete foo,bar  : deletes multiple datasets
    """
    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    # Reset counter (needed for consistency during testing).
    jsonrec.reset_counter()

    # Delete should be the first to execute.
    if delete:
        delete_data(delete)
    else:
        # Prints the data listing.
        print_data_list()
Esempio n. 11
0
def run(build=False, download=False, preload=False, so=False, go=False,
        lineage=False, update=False, plot='', define=False, verbose=False, *query):

    # Join up all words.
    query = " ".join(query)

    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    if download:
        download_prebuilt()

    if update:
        download_terms()

    if build:
        build_db()

    terms, nodes, names, back_prop = get_data(preload=preload)

    query = query.strip()

    prefix = SO_ID if so else ''
    prefix = GO_ID if go else prefix

    if query:
        perform_query(query=query,
                      lineage=lineage,
                      terms=terms,
                      prefix=prefix,
                      nodes=nodes,
                      back_prop=back_prop,
                      names=names)
    else:
        print_stats(terms=terms)

    if plot:
        plot_term(query=query, names=names, terms=terms, nodes=nodes, back_prop=back_prop, outname=plot)
Esempio n. 12
0
def run(protein=False,
        translate=False,
        transcribe=False,
        reverse=False,
        complement=False,
        revcomp=False,
        seqid='',
        start='',
        end='',
        type='',
        gene='',
        name='',
        match='',
        id_='',
        inter=False,
        features=False,
        fasta=False,
        gff=False,
        json=False,
        genbank=False,
        verbose=False,
        *data):
    """
    Produces FASTA representations for data.
    """

    # Turn on features if some parameters are present.
    features = features or (type or name or match or id_ or protein)

    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    # Reset counter (needed for consistency during testing).
    jsonrec.reset_counter()

    # Check that data have no dashes.
    utils.no_dash(data)

    def make_param(acc):
        """
        Creates a parameter for each accession.

        """
        # Set the verbosity
        utils.set_verbosity(logger, level=int(verbose))

        # A simple wrapper class to carry all parameters around.
        p = objects.Param(start=start,
                          end=end,
                          seqid=seqid,
                          protein=protein,
                          revcomp=revcomp,
                          acc=acc,
                          translate=translate,
                          reverse=reverse,
                          uid=id_,
                          gff=gff,
                          complement=complement,
                          name=name,
                          inter=inter,
                          features=features,
                          fasta=fasta,
                          type=type,
                          gene=gene,
                          regexp=match,
                          transcribe=transcribe)

        # Fill the json data for the parameter if not an update
        p.json = fetch.get_json(p.acc, seqid=seqid, inter=inter)

        return p

    params = list(map(make_param, data))

    if fasta:
        fastarec.fasta_view(params)
    elif gff:
        gffrec.gff_view(params)
    elif json:
        jsonrec.json_view(params)
    elif genbank:
        fetch.genbank_view(params)
    else:
        fastarec.fasta_view(params)
Esempio n. 13
0
File: align.py Progetto: Natay/bio-2
def run(start=1,
        end='',
        gap_open=11,
        gap_extend=1,
        local_=False,
        global_=False,
        semiglobal=False,
        protein=False,
        translate=False,
        inter=False,
        verbose=False,
        query=None,
        target=None):
    """
    Performs an alignment between the query and target.
    """

    # Set the verbosity of the process.
    utils.set_verbosity(logger, level=int(verbose))

    # Ensure counter is reset.
    jsonrec.reset_counter()

    # Requires two inputs.
    if not (query and target):
        utils.error(f"Please specify both a QUERY and a TARGET")

    if global_:
        mode = const.GLOBAL_ALIGN
    elif local_:
        mode = const.LOCAL_ALIGN
    elif semiglobal:
        mode = const.SEMIGLOBAL_ALIGN
    else:
        mode = const.SEMIGLOBAL_ALIGN

    param1 = objects.Param(acc=query,
                           protein=protein,
                           translate=translate,
                           start=start,
                           end=end,
                           gap_open=gap_open,
                           gap_extend=gap_extend,
                           mode=mode)
    param2 = objects.Param(acc=target,
                           protein=protein,
                           translate=translate,
                           start=start,
                           end=end,
                           gap_open=gap_open,
                           gap_extend=gap_extend,
                           mode=mode)

    # Get the JSON data.
    param1.json = storage.get_json(param1.acc, inter=inter, strict=True)
    param2.json = storage.get_json(param2.acc, inter=inter, strict=True)

    for rec1 in param1.json:

        for rec2 in param2.json:

            qrecs = fastarec.get_fasta(rec1, param=param1)
            trecs = fastarec.get_fasta(rec2, param=param2)

            for qseq in qrecs:
                for tseq in trecs:
                    parasail_align(qseq=qseq, tseq=tseq, param=param1)
Esempio n. 14
0
def run(start=1,
        end='',
        gap_open=11,
        gap_extend=1,
        local_=False,
        global_=False,
        semiglobal=False,
        protein=False,
        translate=False,
        inter=False,
        table=False,
        strict=False,
        pep1=False,
        pep3=False,
        verbose=False,
        target=None,
        query=None):
    """
    Performs an alignment between the query and target.
    """

    # Alignments over this size will take a long time!
    MAX_LEN = 100000

    # Set the verbosity of the process.
    utils.set_verbosity(logger, level=int(verbose))

    # Reset counter (needed for consistency during testing).
    jsonrec.reset_counter()

    # This method requires two inputs.
    if not (query and target):
        utils.error(f"Please specify both a QUERY and a TARGET")

    if global_:
        mode = const.GLOBAL_ALIGN
    elif local_:
        mode = const.LOCAL_ALIGN
    elif semiglobal:
        mode = const.SEMIGLOBAL_ALIGN
    else:
        mode = const.GLOBAL_ALIGN

    # A parameter for each record.
    param1 = objects.Param(acc=query,
                           protein=protein,
                           translate=translate,
                           pep1=pep1,
                           pep3=pep3,
                           start=start,
                           end=end,
                           gap_open=gap_open,
                           gap_extend=gap_extend,
                           mode=mode)

    param2 = objects.Param(acc=target,
                           protein=protein,
                           translate=translate,
                           start=start,
                           end=end,
                           gap_open=gap_open,
                           gap_extend=gap_extend,
                           mode=mode)

    # Get the JSON data.
    param1.json = storage.get_json(param1.acc, inter=inter, strict=True)
    param2.json = storage.get_json(param2.acc, inter=inter, strict=True)

    # Each data object may contain several records.
    for rec1 in param1.json:
        for rec2 in param2.json:

            qrecs = fastarec.get_fasta(rec1, param=param1)
            trecs = fastarec.get_fasta(rec2, param=param2)

            for qseq in qrecs:
                for tseq in trecs:

                    if (len(qseq) > MAX_LEN):
                        utils.error(
                            f"query is longer than maximum: {len(qseq):,} > {MAX_LEN:,}"
                        )

                    if (len(tseq) > MAX_LEN):
                        utils.error(
                            f"target sequence is longer than maximum: {len(tseq):,} > {MAX_LEN:,}"
                        )

                    biopython_align(qseq=qseq,
                                    tseq=tseq,
                                    param=param1,
                                    table=table,
                                    strict=strict)
Esempio n. 15
0
def run(lineage=False,
        update=False,
        download=False,
        accessions=False,
        keep='',
        remove='',
        field=1,
        scinames='',
        children=False,
        list_=False,
        depth=0,
        metadata=False,
        preload=False,
        indent=2,
        sep='',
        verbose=False,
        *terms):
    global SEP, INDENT, LIMIT

    # Input may come as a stream.
    if not terms and not sys.stdin.isatty():
        stream = sys.stdin
    else:
        stream = None

    # Indentation level
    INDENT = ' ' * indent

    # Separator string.
    SEP = decode(sep) if sep else ", "

    # Set the verbosity
    utils.set_verbosity(logger, level=int(verbose))

    # Download the prebuilt database.
    if download:
        download_prebuilt()

    # Downloads a new taxdump and builds a new taxonomy database.
    if update:
        build_database(limit=LIMIT)

    # Get the content of the database.
    names, graph = get_data(preload=preload, acc=accessions)

    # List the content of a database.
    if list_:
        print_database(names=names, graph=graph)
        return

    # Obtain metadata for the taxon
    if metadata:
        print_metadata(terms)
        return

    if scinames:
        search_file(scinames,
                    names=names,
                    latin=latin,
                    graph=graph,
                    include=children)
        return

    # Filters a file by a column.
    if keep or remove:
        filter_file(stream=stream,
                    keep=keep,
                    remove=remove,
                    graph=graph,
                    colidx=field - 1)
        return

    # Input may come from a file or command line.
    if stream:
        terms = parse_stream(stream, field=1)

    # No valid terms found. Print database stats.
    if not terms:
        print_stats(names=names, graph=graph)
        return

    # These are the terms looked up in the database.
    words = []

    # Some terms may be valid data names.
    for term in terms:
        term = term.strip()
        # Attempts to interpret the word as an existing dataset.
        json = fetch.get_json(term)

        # Extend the search temrs.
        taxids = parse_taxids(json) if json else [term]

        # Add to the terms.
        words.extend(taxids)

    # Produce lineages
    if lineage:
        for term in words:
            print_lineage(term, names=names)
        return

    # Will check to mixed terms (valid taxids and search words mixed)

    # Truth vector to terms in names.
    valid = list(map(lambda x: x in names, words))
    any_valid = any(valid)
    all_valid = all(valid)

    # Mixed term condition.
    mixed_terms = any_valid and not all_valid

    # We don't allow mixed terms (produces different outputs).
    if mixed_terms:
        invalid = ", ".join(filter(lambda x: x not in names, words))
        msg = f"Unkown taxids: {invalid}"
        utils.error(msg)

    # Apply the approprate task to each term separately.
    for term in words:
        if all_valid:
            print_term(term, names=names, graph=graph, maxdepth=depth)
        else:
            search_taxa(term)