예제 #1
0
파일: model_core.py 프로젝트: mjr129/groot
    def get_aligned_fasta(self):
        r = []

        for name, value in bio_helper.parse_fasta(text=self.alignment):
            r.append(">" +
                     self.model.genes.by_legacy_accession(name).accession)
            r.append(value)

        return "\n".join(r)
예제 #2
0
    def load(self, table: LookupTable, fasta: str):
        sequences = list(bio_helper.parse_fasta(text=fasta))
        self.max_len = max(len(x[1]) for x in sequences) if sequences else 0
        self.genes = []

        for i in range(len(sequences)):
            self.genes.append(
                (sequences[i][0], sequences[i][1].ljust(self.max_len)))

        self.table = table
        self.highlight_row = None
        self.highlight_col = None
예제 #3
0
def colour_fasta_ansi(array: str,
                      site_type: Optional[ESiteType] = None,
                      model: Model = None,
                      x=1,
                      n=99999):
    table = __table_from_type(site_type)

    result = []

    first = True

    for name, sites in bio_helper.parse_fasta(text=array):
        if first:
            first = False
        else:
            result.append("\n")

        if model is not None:
            if Gene.is_legacy_accession(name):
                name = model.genes.by_legacy_accession(name).accession

        result.append(ansi.BACK_BRIGHT_BLACK + name.ljust(20) +
                      ansi.BACK_RESET + "\n")

        result_line = []

        s = (x - 1)

        if s != 0:
            result_line.append(ansi.FORE_WHITE + ansi.BACK_BLUE + "…" +
                               ansi.RESET)

        e = s + n

        for char in sites[s:e]:
            result_line.append(table.get(char, ansi.FORE_BRIGHT_BLACK) + char)

        if e < len(sites) - 1:
            result_line.append(ansi.FORE_WHITE + ansi.BACK_BLUE + "…")

        result.append("".join(result_line) + ansi.RESET)

    return "".join(result)
예제 #4
0
def import_genes(file_name: str) -> EChanges:
    """
    Imports a FASTA file into your model.
    If data already exists in the model, only sequence data matching sequences already in the model is loaded.
    
    :param file_name:   File to import
    """
    model = global_view.current_model()
    model.get_status(STAGES.SEQUENCES_2).assert_import()

    model.user_comments.append("IMPORT_FASTA \"{}\"".format(file_name))

    with LOG("IMPORT FASTA FROM '{}'".format(file_name)):
        obtain_only = model._has_data()
        num_updates = 0
        idle = 0
        idle_counter = 10000

        for name, sequence_data in bio_helper.parse_fasta(file=file_name):
            sequence = _make_gene(model, str(name), obtain_only,
                                  len(sequence_data), True)

            if sequence:
                LOG("FASTA UPDATES {} WITH ARRAY OF LENGTH {}".format(
                    sequence, len(sequence_data)))
                num_updates += 1
                sequence.site_array = str(sequence_data)
                idle = 0
            else:
                idle += 1

                if idle == idle_counter:
                    LOG("THIS FASTA IS BORING...")
                    idle_counter *= 2
                    idle = 0

    pr.printx("<verbose>Imported Fasta from <file>{}</file>.</verbose>",
              file_name)

    return EChanges.MODEL_ENTITIES
예제 #5
0
def composite_search_fix( blast: isFilename[EFileMode.READ], fasta: isFilename[EFileMode.READ], output: isFilename[EFileMode.OUTPUT] ):
    """
    Converts standard BLAST format 6 TSV to `Composite search` formatted BLAST. 
    
    Composite search [1] uses a custom input format.
    If you already have standard BLAST this converts to that format, so you don't need to BLAST again.
    
    [1] JS Pathmanathan, P Lopez, F-J Lapointe and E Bapteste
    
    :param blast:   BLAST file 
    :param fasta:   FASTA file 
    :param output:  Output
    :return:        BLAST file, suitable for use with composite searcher 
    """
    # 
    # CS: qseqid sseqid evalue pident    bitscore qstart     qend     qlen*  sstart send   slen*
    # ST: qseqid sseqid pident alignment length   mismatches gapopens qstart qend   sstart send evalue bitscore
    
    lengths = { }
    
    with pr.pr_action( "Reading FASTA" ) as action:
        for accession, sequence in bio_helper.parse_fasta( file = fasta ):
            if " " in accession:
                accession = accession.split( " ", 1 )[0]
            
            lengths[accession] = len( sequence )
            action.increment()
    
    pr.pr_verbose( "{} accessions".format( len( lengths ) ) )
    count = 0
    
    with io_helper.open_write( output ) as file_out:
        with pr.pr_action( "Processing" ) as action:
            with open( blast, "r" ) as file_in:
                for row in file_in:
                    count += 1
                    action.increment()
                    elements = row.strip().split( "\t" )
                    
                    qseqid = elements[0]
                    sseqid = elements[1]
                    pident = elements[2]
                    # length = elements[3]
                    # mismatches = elements[4]
                    # gapopens = elements[5]
                    qstart = elements[6]
                    qend = elements[7]
                    sstart = elements[8]
                    send = elements[9]
                    evalue = elements[10]
                    bitscore = elements[11]
                    
                    try:
                        qlen = str( lengths[qseqid] )
                        slen = str( lengths[sseqid] )
                    except KeyError as ex:
                        raise ValueError( "Accession found in BLAST file but not in FASTA file. See internal error for details." ) from ex
                    
                    file_out.write( "\t".join( [qseqid, sseqid, evalue, pident, bitscore, qstart, qend, qlen, sstart, send, slen] ) )
                    file_out.write( "\n" )
    
    pr.pr_verbose( "{} BLASTs".format( count ) )