def run(start=1, end='', gap_open=11, gap_extend=1, local_=False, global_=False, semiglobal=False, protein=False, translate=False, inter=False, table=False, mutations=False, strict=False, pep1=False, pep3=False, limit=1, verbose=False, target=None, query=None): """ Performs an alignment between the query and target. """ # Alignments over this size will take a long time! MAX_LEN = 100000 # Set the verbosity of the process. utils.set_verbosity(logger, level=int(verbose)) # Reset counter (needed for consistency during testing). jsonrec.reset_sequence_names() # This method requires two inputs. if not (query and target): utils.error(f"Please specify a TARGET and a QUERY") if global_: mode = const.GLOBAL_ALIGN elif local_: mode = const.LOCAL_ALIGN elif semiglobal: mode = const.SEMIGLOBAL_ALIGN else: mode = const.GLOBAL_ALIGN # A parameter for each record. common = dict( protein=protein, translate=translate, mutations=mutations, pep1=pep1, pep3=pep3, table=table, strict=strict, start=start, end=end, gap_open=gap_open, gap_extend=gap_extend, mode=mode ) # Create parameters to represent each data. param_t = objects.Param(acc=target, **common) param_q = objects.Param(acc=query, **common) # Fill JSON data for parameters. param_t.json = fetch.get_json(param_t.acc, inter=inter, strict=True)[:limit] param_q.json = fetch.get_json(param_q.acc, inter=inter, strict=True)[:limit] # Each data object may contain several records. # # For more than one record we iterate in pairs # for rec1, rec2 in zip(param_q.json, param_t.json): qrecs = fastarec.get_fasta(rec1, param=param_q) trecs = fastarec.get_fasta(rec2, param=param_t) for qseq, tseq in zip(qrecs, trecs): if (len(qseq) > MAX_LEN): utils.error(f"query is longer than maximum: {len(qseq):,} > {MAX_LEN:,}") if (len(tseq) > MAX_LEN): utils.error(f"target sequence is longer than maximum: {len(tseq):,} > {MAX_LEN:,}") biopython_align(qseq=qseq, tseq=tseq, param=param_q)
def run(limit=0, list_=False, flat=False, indent=' ', sep=', ', lineage=False, build=False, update=False, preload=False, download=False, verbose=False, *words): global SEP, INDENT limit = limit or None # Recognize string encodings: \t etc. INDENT = codecs.decode(indent, 'unicode_escape') SEP = codecs.decode(sep, 'unicode_escape') # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) # Access the database. names, graph = get_data(preload=preload) if download: download_prebuilt() if list_: print_database(names=names, graph=graph) sys.exit() if update: update_taxdump() if build: build_database(limit=limit) terms = [] # Attempts to fetch data if possible. for word in words: json = fetch.get_json(word) doubles = [jsonrec.find_taxid(rec) for rec in json] if json else [[]] taxids = [elem for sublist in doubles for elem in sublist] if taxids: terms.extend(taxids) else: terms.append(word) for word in terms: if lineage: print_lineage(word, names=names, flat=flat) else: query(word, names=names, graph=graph) # No terms listed. Print database stats. if not terms: print_stats(names=names, graph=graph)
def run(project=False, limit='', sample=False, table=False, inter=False, verbose=False, *acc): # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) if inter: # The query terms will be the same as the input collect = [(t, {PROJECT: t, SAMPLE: t}) for t in acc] else: # Parse the query terms from the data collect = process_storage(acc) for row in collect: name, metadata = row if project: term = metadata[PROJECT] search(term, tabular=table, limit=limit) elif sample: term = metadata[SAMPLE] search(term, tabular=table, limit=limit) else: print_links(row)
def make_param(acc): """ Creates a parameter for each accession. """ # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) # A simple wrapper class to carry all parameters around. p = objects.Param(start=start, end=end, seqid=seqid, protein=protein, revcomp=revcomp, acc=acc, translate=translate, reverse=reverse, uid=id_, gff=gff, complement=complement, name=name, inter=inter, features=features, fasta=fasta, type=type, gene=gene, regexp=match, transcribe=transcribe) # Fill the json data for the parameter if not an update p.json = fetch.get_json(p.acc, seqid=seqid, inter=inter) return p
def run(update=False, rename='', seqid='', protein=False, verbose=False, *data): """ Fetches and manages data in storage. """ # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) # Reset counter (needed for consistency during testing). jsonrec.reset_counter() # A simple wrapper class to represent input parameters. param = objects.Param(seqid=seqid, rename=rename, start=1, protein=protein, update=update) # Fetch the data. fetch_data(data, param=param) # Renaming after fetching. if rename: rename_data(data, param=param, newname=rename)
def run(start=1, end='', mode=LOCAL_ALIGN, gap_open=11, gap_extend=1, protein=False, translate=False, inter=False, verbose=False, query='', target=''): """ Handles an alignment request. """ # Set the verbosity of the process. utils.set_verbosity(logger, level=int(verbose)) # Ensure counter is reset. jsonrec.reset_counter() # Requires two inputs. if not (query and target): utils.error(f"Please specify both a QUERY and a TARGET") param1 = objects.Param(name=query, protein=protein, translate=translate, start=start, end=end, gap_open=gap_open, gap_extend=gap_extend, mode=mode) param2 = objects.Param(name=target, protein=protein, translate=translate, start=start, end=end, gap_open=gap_open, gap_extend=gap_extend, mode=mode) # Get the JSON data. param1.json = storage.get_json(param1.name, inter=inter, strict=True) param2.json = storage.get_json(param2.name, inter=inter, strict=True) for rec1 in param1.json: for rec2 in param2.json: qrecs = fastarec.get_fasta(rec1, param=param1) trecs = fastarec.get_fasta(rec2, param=param2) for qseq in qrecs: for tseq in trecs: parasail_align(qseq=qseq, tseq=tseq, param=param1)
def run(build=False, download=False, preload=False, verbose=False, *words): # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) if download: download_taxdump() if build: build_database() for word in words: query(word, mode=preload)
def run(query=None, build=False, download=False, preload=False, verbose=False): # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) if download: download_terms() if build: build_database() if query: perform_query(query=query, preload=preload)
def make_param(name): """ Creates a parameter for each accession. """ # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) # A very common error to pass a fragment as if name.startswith("-"): msg = f"Invalid accession number: {name}" utils.error(msg) # A simple wrapper class to carry all parameters around. p = objects.Param(start=start, end=end, seqid=seqid, protein=protein, revcomp=revcomp, update=update, name=name, gff=gff, translate=translate, reverse=reverse, complement=complement, fasta=fasta, type=type, gene=gene, regexp=match, transcribe=transcribe) # Fill the json data for the parameter. p.json = storage.get_json(p.name, seqid=seqid, inter=inter) return p
def manage(delete, verbose=False): """ Shows the data in the storage. Usage: bio manage : lists the data bio manage --delete foo : deletes data called foo bio manage --delete foo,bar : deletes multiple datasets """ # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) # Reset counter (needed for consistency during testing). jsonrec.reset_counter() # Delete should be the first to execute. if delete: delete_data(delete) else: # Prints the data listing. print_data_list()
def run(build=False, download=False, preload=False, so=False, go=False, lineage=False, update=False, plot='', define=False, verbose=False, *query): # Join up all words. query = " ".join(query) # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) if download: download_prebuilt() if update: download_terms() if build: build_db() terms, nodes, names, back_prop = get_data(preload=preload) query = query.strip() prefix = SO_ID if so else '' prefix = GO_ID if go else prefix if query: perform_query(query=query, lineage=lineage, terms=terms, prefix=prefix, nodes=nodes, back_prop=back_prop, names=names) else: print_stats(terms=terms) if plot: plot_term(query=query, names=names, terms=terms, nodes=nodes, back_prop=back_prop, outname=plot)
def run(protein=False, translate=False, transcribe=False, reverse=False, complement=False, revcomp=False, seqid='', start='', end='', type='', gene='', name='', match='', id_='', inter=False, features=False, fasta=False, gff=False, json=False, genbank=False, verbose=False, *data): """ Produces FASTA representations for data. """ # Turn on features if some parameters are present. features = features or (type or name or match or id_ or protein) # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) # Reset counter (needed for consistency during testing). jsonrec.reset_counter() # Check that data have no dashes. utils.no_dash(data) def make_param(acc): """ Creates a parameter for each accession. """ # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) # A simple wrapper class to carry all parameters around. p = objects.Param(start=start, end=end, seqid=seqid, protein=protein, revcomp=revcomp, acc=acc, translate=translate, reverse=reverse, uid=id_, gff=gff, complement=complement, name=name, inter=inter, features=features, fasta=fasta, type=type, gene=gene, regexp=match, transcribe=transcribe) # Fill the json data for the parameter if not an update p.json = fetch.get_json(p.acc, seqid=seqid, inter=inter) return p params = list(map(make_param, data)) if fasta: fastarec.fasta_view(params) elif gff: gffrec.gff_view(params) elif json: jsonrec.json_view(params) elif genbank: fetch.genbank_view(params) else: fastarec.fasta_view(params)
def run(start=1, end='', gap_open=11, gap_extend=1, local_=False, global_=False, semiglobal=False, protein=False, translate=False, inter=False, verbose=False, query=None, target=None): """ Performs an alignment between the query and target. """ # Set the verbosity of the process. utils.set_verbosity(logger, level=int(verbose)) # Ensure counter is reset. jsonrec.reset_counter() # Requires two inputs. if not (query and target): utils.error(f"Please specify both a QUERY and a TARGET") if global_: mode = const.GLOBAL_ALIGN elif local_: mode = const.LOCAL_ALIGN elif semiglobal: mode = const.SEMIGLOBAL_ALIGN else: mode = const.SEMIGLOBAL_ALIGN param1 = objects.Param(acc=query, protein=protein, translate=translate, start=start, end=end, gap_open=gap_open, gap_extend=gap_extend, mode=mode) param2 = objects.Param(acc=target, protein=protein, translate=translate, start=start, end=end, gap_open=gap_open, gap_extend=gap_extend, mode=mode) # Get the JSON data. param1.json = storage.get_json(param1.acc, inter=inter, strict=True) param2.json = storage.get_json(param2.acc, inter=inter, strict=True) for rec1 in param1.json: for rec2 in param2.json: qrecs = fastarec.get_fasta(rec1, param=param1) trecs = fastarec.get_fasta(rec2, param=param2) for qseq in qrecs: for tseq in trecs: parasail_align(qseq=qseq, tseq=tseq, param=param1)
def run(start=1, end='', gap_open=11, gap_extend=1, local_=False, global_=False, semiglobal=False, protein=False, translate=False, inter=False, table=False, strict=False, pep1=False, pep3=False, verbose=False, target=None, query=None): """ Performs an alignment between the query and target. """ # Alignments over this size will take a long time! MAX_LEN = 100000 # Set the verbosity of the process. utils.set_verbosity(logger, level=int(verbose)) # Reset counter (needed for consistency during testing). jsonrec.reset_counter() # This method requires two inputs. if not (query and target): utils.error(f"Please specify both a QUERY and a TARGET") if global_: mode = const.GLOBAL_ALIGN elif local_: mode = const.LOCAL_ALIGN elif semiglobal: mode = const.SEMIGLOBAL_ALIGN else: mode = const.GLOBAL_ALIGN # A parameter for each record. param1 = objects.Param(acc=query, protein=protein, translate=translate, pep1=pep1, pep3=pep3, start=start, end=end, gap_open=gap_open, gap_extend=gap_extend, mode=mode) param2 = objects.Param(acc=target, protein=protein, translate=translate, start=start, end=end, gap_open=gap_open, gap_extend=gap_extend, mode=mode) # Get the JSON data. param1.json = storage.get_json(param1.acc, inter=inter, strict=True) param2.json = storage.get_json(param2.acc, inter=inter, strict=True) # Each data object may contain several records. for rec1 in param1.json: for rec2 in param2.json: qrecs = fastarec.get_fasta(rec1, param=param1) trecs = fastarec.get_fasta(rec2, param=param2) for qseq in qrecs: for tseq in trecs: if (len(qseq) > MAX_LEN): utils.error( f"query is longer than maximum: {len(qseq):,} > {MAX_LEN:,}" ) if (len(tseq) > MAX_LEN): utils.error( f"target sequence is longer than maximum: {len(tseq):,} > {MAX_LEN:,}" ) biopython_align(qseq=qseq, tseq=tseq, param=param1, table=table, strict=strict)
def run(lineage=False, update=False, download=False, accessions=False, keep='', remove='', field=1, scinames='', children=False, list_=False, depth=0, metadata=False, preload=False, indent=2, sep='', verbose=False, *terms): global SEP, INDENT, LIMIT # Input may come as a stream. if not terms and not sys.stdin.isatty(): stream = sys.stdin else: stream = None # Indentation level INDENT = ' ' * indent # Separator string. SEP = decode(sep) if sep else ", " # Set the verbosity utils.set_verbosity(logger, level=int(verbose)) # Download the prebuilt database. if download: download_prebuilt() # Downloads a new taxdump and builds a new taxonomy database. if update: build_database(limit=LIMIT) # Get the content of the database. names, graph = get_data(preload=preload, acc=accessions) # List the content of a database. if list_: print_database(names=names, graph=graph) return # Obtain metadata for the taxon if metadata: print_metadata(terms) return if scinames: search_file(scinames, names=names, latin=latin, graph=graph, include=children) return # Filters a file by a column. if keep or remove: filter_file(stream=stream, keep=keep, remove=remove, graph=graph, colidx=field - 1) return # Input may come from a file or command line. if stream: terms = parse_stream(stream, field=1) # No valid terms found. Print database stats. if not terms: print_stats(names=names, graph=graph) return # These are the terms looked up in the database. words = [] # Some terms may be valid data names. for term in terms: term = term.strip() # Attempts to interpret the word as an existing dataset. json = fetch.get_json(term) # Extend the search temrs. taxids = parse_taxids(json) if json else [term] # Add to the terms. words.extend(taxids) # Produce lineages if lineage: for term in words: print_lineage(term, names=names) return # Will check to mixed terms (valid taxids and search words mixed) # Truth vector to terms in names. valid = list(map(lambda x: x in names, words)) any_valid = any(valid) all_valid = all(valid) # Mixed term condition. mixed_terms = any_valid and not all_valid # We don't allow mixed terms (produces different outputs). if mixed_terms: invalid = ", ".join(filter(lambda x: x not in names, words)) msg = f"Unkown taxids: {invalid}" utils.error(msg) # Apply the approprate task to each term separately. for term in words: if all_valid: print_term(term, names=names, graph=graph, maxdepth=depth) else: search_taxa(term)