def get_aligned_fasta(self): r = [] for name, value in bio_helper.parse_fasta(text=self.alignment): r.append(">" + self.model.genes.by_legacy_accession(name).accession) r.append(value) return "\n".join(r)
def load(self, table: LookupTable, fasta: str): sequences = list(bio_helper.parse_fasta(text=fasta)) self.max_len = max(len(x[1]) for x in sequences) if sequences else 0 self.genes = [] for i in range(len(sequences)): self.genes.append( (sequences[i][0], sequences[i][1].ljust(self.max_len))) self.table = table self.highlight_row = None self.highlight_col = None
def colour_fasta_ansi(array: str, site_type: Optional[ESiteType] = None, model: Model = None, x=1, n=99999): table = __table_from_type(site_type) result = [] first = True for name, sites in bio_helper.parse_fasta(text=array): if first: first = False else: result.append("\n") if model is not None: if Gene.is_legacy_accession(name): name = model.genes.by_legacy_accession(name).accession result.append(ansi.BACK_BRIGHT_BLACK + name.ljust(20) + ansi.BACK_RESET + "\n") result_line = [] s = (x - 1) if s != 0: result_line.append(ansi.FORE_WHITE + ansi.BACK_BLUE + "…" + ansi.RESET) e = s + n for char in sites[s:e]: result_line.append(table.get(char, ansi.FORE_BRIGHT_BLACK) + char) if e < len(sites) - 1: result_line.append(ansi.FORE_WHITE + ansi.BACK_BLUE + "…") result.append("".join(result_line) + ansi.RESET) return "".join(result)
def import_genes(file_name: str) -> EChanges: """ Imports a FASTA file into your model. If data already exists in the model, only sequence data matching sequences already in the model is loaded. :param file_name: File to import """ model = global_view.current_model() model.get_status(STAGES.SEQUENCES_2).assert_import() model.user_comments.append("IMPORT_FASTA \"{}\"".format(file_name)) with LOG("IMPORT FASTA FROM '{}'".format(file_name)): obtain_only = model._has_data() num_updates = 0 idle = 0 idle_counter = 10000 for name, sequence_data in bio_helper.parse_fasta(file=file_name): sequence = _make_gene(model, str(name), obtain_only, len(sequence_data), True) if sequence: LOG("FASTA UPDATES {} WITH ARRAY OF LENGTH {}".format( sequence, len(sequence_data))) num_updates += 1 sequence.site_array = str(sequence_data) idle = 0 else: idle += 1 if idle == idle_counter: LOG("THIS FASTA IS BORING...") idle_counter *= 2 idle = 0 pr.printx("<verbose>Imported Fasta from <file>{}</file>.</verbose>", file_name) return EChanges.MODEL_ENTITIES
def composite_search_fix( blast: isFilename[EFileMode.READ], fasta: isFilename[EFileMode.READ], output: isFilename[EFileMode.OUTPUT] ): """ Converts standard BLAST format 6 TSV to `Composite search` formatted BLAST. Composite search [1] uses a custom input format. If you already have standard BLAST this converts to that format, so you don't need to BLAST again. [1] JS Pathmanathan, P Lopez, F-J Lapointe and E Bapteste :param blast: BLAST file :param fasta: FASTA file :param output: Output :return: BLAST file, suitable for use with composite searcher """ # # CS: qseqid sseqid evalue pident bitscore qstart qend qlen* sstart send slen* # ST: qseqid sseqid pident alignment length mismatches gapopens qstart qend sstart send evalue bitscore lengths = { } with pr.pr_action( "Reading FASTA" ) as action: for accession, sequence in bio_helper.parse_fasta( file = fasta ): if " " in accession: accession = accession.split( " ", 1 )[0] lengths[accession] = len( sequence ) action.increment() pr.pr_verbose( "{} accessions".format( len( lengths ) ) ) count = 0 with io_helper.open_write( output ) as file_out: with pr.pr_action( "Processing" ) as action: with open( blast, "r" ) as file_in: for row in file_in: count += 1 action.increment() elements = row.strip().split( "\t" ) qseqid = elements[0] sseqid = elements[1] pident = elements[2] # length = elements[3] # mismatches = elements[4] # gapopens = elements[5] qstart = elements[6] qend = elements[7] sstart = elements[8] send = elements[9] evalue = elements[10] bitscore = elements[11] try: qlen = str( lengths[qseqid] ) slen = str( lengths[sseqid] ) except KeyError as ex: raise ValueError( "Accession found in BLAST file but not in FASTA file. See internal error for details." ) from ex file_out.write( "\t".join( [qseqid, sseqid, evalue, pident, bitscore, qstart, qend, qlen, sstart, send, slen] ) ) file_out.write( "\n" ) pr.pr_verbose( "{} BLASTs".format( count ) )