def extend_simple_core(analyzed_hits, query, args_inner, all_short,
                       multi_query, iteration, ih_model):
    # the extra here is given "pro forma" the sequence is extended exactly by lenghts of unaligned portions of query
    if args_inner.db_type == "blastdb":
        shorts_expanded, _ = rna_blast_analyze.BR_core.extend_hits.expand_hits(
            all_short,
            args_inner.blast_db,
            len(query),
            extra=0,
            blast_regexp=args_inner.blast_regexp,
            skip_missing=args_inner.skip_missing,
            msgs=analyzed_hits.msgs,
        )
    elif args_inner.db_type in ["fasta", "gb", "server", "entrez"]:
        shorts_expanded, _ = rna_blast_analyze.BR_core.extend_hits.expand_hits_from_fasta(
            all_short,
            args_inner.blast_db,
            len(query),
            extra=0,
            blast_regexp=args_inner.blast_regexp,
            skip_missing=args_inner.skip_missing,
            msgs=analyzed_hits.msgs,
            format=args_inner.db_type,
            entrez_email=args_inner.entrez,
            blast_input_file=args_inner.blast_in,
        )
    else:
        raise exceptions.IncorrectDatabaseChoice()

    # check, if blast hits are non - overlapping, if so, add the overlapping hit info to the longer hit
    # reflect this in user output
    # shorts_expanded = merge_blast_hits(shorts_expanded)

    shorts_expanded = trim_before(shorts_expanded)

    shorts_expanded = BA_support.rc_hits_2_rna(shorts_expanded)

    query_seq = query.seq.transcribe()

    # blast only extension
    for exp_hit in shorts_expanded:
        try:
            _out = create_blast_only_report_object(exp_hit, len(query_seq))
            analyzed_hits.hits.append(_out)
        except AssertionError as e:
            exp_hit.annotations['msgs'] += [str(e)]
            analyzed_hits.hits_failed.append(BA_support.Subsequences(exp_hit))
        except exceptions.UnknownStrand as e:
            exp_hit.annotations['msgs'] += [str(e)]
            analyzed_hits.hits_failed.append(BA_support.Subsequences(exp_hit))
        except Exception as e:
            ml.error("Unexpected error when extending with 'simple'.")
            exp_hit.annotations['msgs'] += [str(e)]
            analyzed_hits.hits_failed.append(BA_support.Subsequences(exp_hit))

    if len(analyzed_hits.hits) == 0:
        ml.error(
            "Extension failed for all sequences. Please see the error message. You can also try '--mode locarna'."
        )
        sys.exit(1)

    # assign Locarna score to None as it is not directly accessible from mlocarna
    for hit in analyzed_hits.hits:
        hit.extension.annotations['score'] = None

    # this part predicts homology - it is not truly part of repredict
    homology_prediction, homol_seqs, cm_file_rfam_user = infer_homology(
        analyzed_hits=analyzed_hits,
        args=args_inner,
        cm_model_file=ih_model,
        multi_query=multi_query,
        iteration=iteration)
    for hit, pred in zip(analyzed_hits.hits, homology_prediction):
        hit.hpred = pred
    return analyzed_hits, homology_prediction, homol_seqs, cm_file_rfam_user
Beispiel #2
0
def extend_locarna_core(analyzed_hits, query, args_inner, all_short, multi_query, iteration, ih_model):
    # expand hits according to query + 10 nucleotides +-
    if args_inner.db_type == "blastdb":
        shorts_expanded, _ = rna_blast_analyze.BR_core.extend_hits.expand_hits(
            all_short,
            args_inner.blast_db,
            len(query),
            extra=args_inner.subseq_window_locarna,
            blast_regexp=args_inner.blast_regexp,
            skip_missing=args_inner.skip_missing,
            msgs=analyzed_hits.msgs,
        )
    elif args_inner.db_type in ["fasta", "gb", "server", "entrez"]:
        shorts_expanded, _ = rna_blast_analyze.BR_core.extend_hits.expand_hits_from_fasta(
            all_short,
            args_inner.blast_db,
            len(query),
            extra=args_inner.subseq_window_locarna,
            blast_regexp=args_inner.blast_regexp,
            skip_missing=args_inner.skip_missing,
            msgs=analyzed_hits.msgs,
            format=args_inner.db_type,
            entrez_email=args_inner.entrez,
            blast_input_file=args_inner.blast_in,
        )
    else:
        raise exceptions.IncorrectDatabaseChoice()

    shorts_expanded = BA_support.rc_hits_2_rna(shorts_expanded)

    query_seq = query.seq.transcribe()

    # compute alignment here

    if args_inner.threads == 1:
        result = []
        for oeh in shorts_expanded:
            result.append(
                locarna_worker(
                    (
                        oeh,
                        query_seq,
                        args_inner.locarna_params,
                        args_inner.locarna_anchor_length
                    )
                )
            )
    else:
        pack = []
        for oeh in shorts_expanded:
            pack.append(
                (
                    oeh,
                    query_seq,
                    args_inner.locarna_params,
                    args_inner.locarna_anchor_length
                )
            )
        pool = Pool(processes=args_inner.threads)
        result = pool.map(locarna_worker, pack)
        pool.close()

    for res in result:
        if res.extension is None:
            analyzed_hits.hits_failed.append(res)
        else:
            analyzed_hits.hits.append(res)

    # this part predicts homology - it is not truly part of repredict
    homology_prediction, homol_seqs, cm_file_rfam_user = infer_homology(
        analyzed_hits=analyzed_hits, args=args_inner, cm_model_file=ih_model, multi_query=multi_query,
        iteration=iteration
    )
    # add homology prediction to the data
    for hit, pred in zip(analyzed_hits.hits, homology_prediction):
        hit.hpred = pred
    return analyzed_hits, homology_prediction, homol_seqs, cm_file_rfam_user