Python Clone.jalleles Examples

Programming Language: Python

Namespace/Package Name: aimseqtk.lib.clone

Class/Type: Clone

Method/Function: jalleles

Examples at hotexamples.com: 2

Python Clone.jalleles - 2 examples found. These are the top rated real world Python examples of aimseqtk.lib.clone.Clone.jalleles extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

aa(3)

cdr3aa(3)

productive(3)

lastvpos(3)

dgenes(3)

firstdpos(3)

firstjpos(3)

lastdpos(3)

valleles(2)

nuc(2)

jalleles(2)

jdel(2)

dalleles(2)

d5del(2)

cdr3nuc(2)

vdel(2)

id(1)

patient(1)

d3del(1)

samplename(1)

Example #1

Show file

File: mitcr.py Project: ngannguyen/aimseqtk

def mitcr_parseline(line, index2col):
    items = line.strip().split('\t')
    if len(items) != len(index2col):
        sys.stderr.write("Inconsistent number of columns between the following\
                          line and the header line, skipped it:\n\
                          Line:\n%s\n" % line)
        return None
    
    col2val = {}
    valid_cols = mitcr_columns()
    for i, col in index2col.iteritems():
        if col in valid_cols:
            col2val[col] = items[i]

    # Return None if line does not have minimum required fields.
    required_cols = ["Read count", "Percentage", "CDR3 nucleotide sequence",
                     "V segments", "J segments"]
    for c in required_cols:
        if c not in col2val or not col2val[c]:
            return None

    count = int(col2val['Read count'])
    freq = float(col2val['Percentage'])/100.0
    nuc = col2val['CDR3 nucleotide sequence']
    vgenes = col2val['V segments'].split(', ')
    jgenes = col2val['J segments'].split(', ')

    clone = Clone(count, freq, nuc, vgenes, jgenes, cdr3nuc=nuc)

    clone.productive = True  # Assuming MiTCR only output productive clones
    if 'D segments' in col2val:
        clone.dgenes = col2val['D segments'].split(', ')
    if 'V alleles' in col2val:
        clone.valleles = col2val['V alleles'].split(', ')
    if 'J alleles' in col2val:
        clone.jalleles = col2val['J alleles'].split(', ')
    if 'D alleles' in col2val:
        clone.dalleles = col2val['D alleles'].split(', ')

    if 'CDR3 amino acid sequence' in col2val:
        clone.aa = col2val['CDR3 amino acid sequence']
        clone.cdr3aa = col2val['CDR3 amino acid sequence']
    if 'Last V nucleotide position' in col2val:
        clone.lastvpos = int(col2val['Last V nucleotide position'])
    if 'First D nucleotide position' in col2val:
        clone.firstdpos = int(col2val['First D nucleotide position'])
    if 'Last D nucleotide position' in col2val:
        clone.lastdpos = int(col2val['Last D nucleotide position'])
    if 'First J nucleotide position' in col2val:
        clone.firstjpos = int(col2val['First J nucleotide position'])

    return clone

Example #2

Show file

File: sequenta.py Project: ngannguyen/aimseqtk

def sequenta_parseline(line, index2col):
    items = line.strip("\n").split("\t")
    if len(items) != len(index2col):
        sys.stderr.write(
            "Incosistent number of columns between the following\
                          line and the header line, skipped it:\n\
                          Line:\n%s\n"
            % line
        )
        return None

    col2val = {}
    valid_cols = sequenta_columns()
    for i, col in index2col.iteritems():
        if col in valid_cols:
            col2val[col] = items[i]

    # Return None if clone is "Water"
    if "Patient" in col2val and col2val["Patient"] == "Water":
        return None

    # Return None if line does not have minimum required fields.
    required_cols = [
        "Total_Read_Count",
        "Log10_Frequency",
        "Clone_Sequence",
        "V_Segment_Major_Gene",
        "J_Segment_Major_Gene",
    ]
    for c in required_cols:
        if c not in col2val or col2val[c] in ["NAN", "", "-"]:
            return None

    count = libcommon.soft_int(col2val["Total_Read_Count"])
    try:
        freq = 10 ** float(col2val["Log10_Frequency"])
    except:  # Return None if clone does not have a valid frequency
        return None
    nuc = col2val["Clone_Sequence"]
    vgenes = col2val["V_Segment_Major_Gene"].split("; ")
    jgenes = col2val["J_Segment_Major_Gene"].split("; ")
    # Clone with required fields
    clone = Clone(count, freq, nuc, vgenes, jgenes)

    # Additional information if available
    # Gene info:
    if "D_Segment_Major_Allele" in col2val:
        dstr = col2val["D_Segment_Major_Allele"]
        if dstr not in ["NAN", "", "-"]:
            dalleles = dstr.split("; ")
            dgenes = []
            for d in dalleles:
                dgene = d.split("*")[0]
                if dgene not in dgenes:
                    dgenes.append(dgene)
            clone.dgenes = dgenes
            clone.dalleles = dalleles
    if not clone.dgenes:  # no dgenes info
        jgroups = get_j_groups(clone.jgenes)
        if ["1"] == jgroups:
            clone.dgenes = ["TRBD1"]
        else:
            clone.dgenes = [random.choice(["TRBD1", "TRBD2"])]

    if "V_Segment_Major_Allele" in col2val:
        clone.valleles = col2val["V_Segment_Major_Allele"].split("; ")
    if "J_Segment_Major_Allele" in col2val:
        clone.jalleles = col2val["J_Segment_Major_Allele"].split("; ")

    # Sequence ID, status and cdr3aa:
    if "Sample" in col2val:
        clone.samplename = col2val["Sample"]
    if "Patient" in col2val:
        clone.patient = col2val["Patient"]
    if "Clone_Index" in col2val:
        clone.id = col2val["Clone_Index"]
    if "Is_Good_Frame" in col2val:
        if col2val["Is_Good_Frame"].lower() == "true":
            clone.productive = True
        else:
            clone.productive = False
    if "Clone_Protein_Sequence" in col2val:
        clone.aa = col2val["Clone_Protein_Sequence"].replace("*", "Z")

    offset = 0
    if "CDR3_Sense_Sequence" in col2val:
        clone.cdr3nuc = col2val["CDR3_Sense_Sequence"]
        if not re.search(clone.cdr3nuc, clone.nuc):
            clone.nuc = libcommon.rc(clone.nuc)
        try:
            cdr3aa = sequenta_getaa(clone.cdr3nuc)
            clone.cdr3aa = cdr3aa
        except:  # return None if cannot translate cdr3nuc
            return None
        # Make sure nuc is in frame
        cdr3start = re.search(clone.cdr3nuc, clone.nuc).start()
        offset = cdr3start % 3
        nuclen = len(clone.nuc)
        endoffset = (nuclen - offset) % 3
        clone.nuc = clone.nuc[offset : nuclen - endoffset]

    # Junctional info:
    if "V_Segment_Extension_Length" in col2val:
        vins = libcommon.soft_int(col2val["V_Segment_Extension_Length"])
        clone.lastvpos = vins - 1 - offset
        if "N_Bases_adjacent_V" in col2val:
            d5ins = col2val["N_Bases_adjacent_V"]
            if not d5ins.startswith("-") and d5ins not in ["", "NAN"]:
                clone.firstdpos = clone.lastvpos + int(d5ins) + 1
    if "J_Segment_Extension_Length" in col2val:
        jins = libcommon.soft_int(col2val["J_Segment_Extension_Length"])
        clone.firstjpos = len(clone.nuc) - jins
        if "N_Bases_adjacent_J" in col2val:
            d3ins = col2val["N_Bases_adjacent_J"]
            if not d3ins.startswith("-") and d3ins not in ["", "NAN"]:
                clone.lastdpos = clone.firstjpos - int(d3ins) - 1

    # Deletions:
    if "V_Segment_Deletion_Length" in col2val:
        vdel = col2val["V_Segment_Deletion_Length"]
        if not vdel.startswith("-") and vdel not in ["", "NAN"]:
            clone.vdel = libcommon.soft_int(vdel)
    if "J_Segment_Deletion_Length" in col2val:
        jdel = col2val["J_Segment_Deletion_Length"]
        if not jdel.startswith("-") and jdel not in ["", "NAN"]:
            clone.jdel = libcommon.soft_int(jdel)

    # Special treatment for D info:
    d2fulllen = {"TRBD1": 12, "TRBD2": 16}
    if "D_Segment_length" in col2val:
        dgene = clone.dgenes[0]
        dfulllen = d2fulllen[dgene]
        dlen = col2val["D_Segment_length"]
        if not dlen.startswith("-") and dlen not in ["", "NAN"]:
            ddel = dfulllen - int(dlen)
            clone.d5del, clone.d3del = get_ddels(ddel)
            # clone.d5del = ddel / 2.0
            # clone.d3del = ddel - clone.d5del
        else:  # all D was deleted
            clone.d5del, clone.d3del = get_ddels(dfulllen)
            # clone.d5del = dfulllen / 2
            # clone.d3del = dfulllen - clone.d5del
            ndn = clone.firstjpos - clone.lastvpos
            clone.firstdpos = clone.lastvpos + ndn / 2 + 1
            clone.lastdpos = clone.firstdpos - 1

    return clone