예제 #1
0
def create_precision_report_row(
    classification: float, gt_conf: float = 0, sample: str = "sample1"
) -> pd.Series:
    ref_probe_header = ProbeHeader()
    pandora_probe_header = ProbeHeader(gt_conf=gt_conf)
    data = {
        "sample": sample,
        "query_probe_header": str(pandora_probe_header),
        "ref_probe_header": str(ref_probe_header),
        "classification": classification,
    }
    return pd.Series(data=data)
예제 #2
0
def create_classifier_with_two_entries(cls: Type) -> Type[Classifier]:
    flag = 0
    cigar = "56M"
    nm = "NM:i:0"
    md = "MD:Z:56"
    mapq = 60
    pos = 1
    query_header = ProbeHeader(interval=ProbeInterval(12, 17))
    sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT"
    ref_header = ProbeHeader(
        chrom="GC00000422_2",
        sample="CFT073",
        pos=603,
        interval=ProbeInterval(25, 32),
        svtype="PH_SNPs",
        gt_conf=89.5987,
    )
    header = create_sam_header(str(ref_header), 64)
    contents = str(header) + "\n"
    record1 = pysam.AlignedSegment.fromstring(
        f"{query_header}\t{flag}\t{ref_header}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:43\tXS:i:32",
        header,
    )
    contents += record1.to_string() + "\n"

    flag = 2048
    cigar = "43M"
    nm = "NM:i:1"
    md = "MD:Z:21T21"
    mapq = 0
    pos = 5
    query_header = ProbeHeader(chrom="3", pos=14788, interval=ProbeInterval(21, 22))
    sequence = "CGCGAAAGCCCTGACCATCTGCACCGTGTCTGACCACATCCGC"
    header = create_sam_header(str(ref_header), 57)
    record2 = pysam.AlignedSegment.fromstring(
        f"{query_header}\t{flag}\t{ref_header}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:43\tXS:i:32",
        header,
    )
    contents += record2.to_string() + "\n"
    sam = create_tmp_sam(contents)
    return cls(sam)
예제 #3
0
def create_incorrect_primary_sam_record() -> pysam.AlignedSegment:
    flag = 0
    cigar = "56M"
    nm = "NM:i:1"
    md = "MD:Z:12T43"
    mapq = 60
    pos = 1
    query_header = ProbeHeader(interval=ProbeInterval(12, 13))
    ref_header = ProbeHeader(
        chrom="GC00000422_2",
        sample="CFT073",
        pos=603,
        interval=ProbeInterval(25, 32),
        svtype="PH_SNPs",
        gt_conf=89.5987,
    )
    sequence = "AAAAAAAAAAACGGCTCGCATAGACACGACGACGACACGTACGATCGATCAGTCAT"
    header = create_sam_header(str(ref_header), 64)
    record = pysam.AlignedSegment.fromstring(
        f"{query_header}\t{flag}\t{ref_header}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:43\tXS:i:32",
        header,
    )
    return record
예제 #4
0
def create_correct_supplementary_sam_record() -> pysam.AlignedSegment:
    flag = 2048
    cigar = "43M"
    nm = "NM:i:1"
    md = "MD:Z:19T23"
    mapq = 0
    pos = 5
    query_header = ProbeHeader(chrom="3", pos=14788, interval=ProbeInterval(21, 22))
    ref_header = ProbeHeader(
        chrom="GC00000422_2",
        sample="CFT073",
        pos=603,
        interval=ProbeInterval(25, 32),
        svtype="PH_SNPs",
        gt_conf=89.5987,
    )
    sequence = "CGCGAAAGCCCTGACCATCTGCACCGTGTCTGACCACATCCGC"
    header = create_sam_header(str(ref_header), 57)
    record = pysam.AlignedSegment.fromstring(
        f"{query_header}\t{flag}\t{ref_header}\t{pos}\t{mapq}\t{cigar}\t*\t0\t0\t{sequence}\t*\t{nm}\t{md}\tAS:i:43\tXS:i:32",
        header,
    )
    return record
예제 #5
0
def create_recall_report_row(
    truth_probe_header:str, classification: AlignmentAssessment, gt_conf: float = 0, sample: str = "sample1", with_gt_conf=False
) -> pd.Series:
    vcf_probe_header = ProbeHeader(gt_conf=gt_conf)
    data = {
        "sample": sample,
        "query_probe_header": str(truth_probe_header),
        "ref_probe_header": str(vcf_probe_header),
        "classification": classification.value,
        "good_eval": classification.value in ["primary_correct", "secondary_correct", "supplementary_correct"],
        "PVID": None,
        "NB_ALL": None,
        "ALL_ID": None,
        "NB_DIFF_ALL_SEQ": None,
        "ALL_SEQ_ID": None,
        "NB_OF_SAMPLES": None,
    }
    if with_gt_conf:
        data["GT_CONF"] = gt_conf

    return pd.Series(data=data)