예제 #1
0
def aa_analysis(df, property):
    if property == "ncpr":
        df = df[pd.notnull(df['Amino_acids'])]
        df[["AA1","AA2"]] = df['Amino_acids'].str.split('/',expand=True)
        isoelectric_point = []
        for sequence in df["AA1"]:
            try:
                cdr3 = ProteinAnalysis(str(sequence))
                cidercdr3 = SequenceParameters(str(sequence)) 
                isoelectric_point.append(cidercdr3.get_NCPR())
            except:
                isoelectric_point.append(0)
                pass
        df["AA1_Iso"] = isoelectric_point
        isoelectric_point2 = []
        for sequence in df["AA2"]:
            try:
                cdr3 = ProteinAnalysis(str(sequence))
                cidercdr3 = SequenceParameters(str(sequence)) 
                isoelectric_point2.append(cidercdr3.get_NCPR())
            except:
                isoelectric_point2.append(0)
                pass
        df["AA2_Iso"] = isoelectric_point2
        df["AA_Iso_Delta"] = df["AA2_Iso"] - df["AA1_Iso"]
        df = df[["AA1_Iso", "AA2_Iso", "AA_Iso_Delta"]]
    elif property == "uversky_hydropathy":
        df = df[pd.notnull(df['Amino_acids'])]
        df[["AA1","AA2"]] = df['Amino_acids'].str.split('/',expand=True)
        isoelectric_point = []
        for sequence in df["AA1"]:
            try:
                cdr3 = ProteinAnalysis(str(sequence))
                cidercdr3 = SequenceParameters(str(sequence)) 
                isoelectric_point.append(cidercdr3.get_uversky_hydropathy())
            except:
                isoelectric_point.append(0)
                pass
        df["AA1_Iso"] = isoelectric_point
        isoelectric_point2 = []
        for sequence in df["AA2"]:
            try:
                cdr3 = ProteinAnalysis(str(sequence))
                cidercdr3 = SequenceParameters(str(sequence)) 
                isoelectric_point2.append(cidercdr3.get_uversky_hydropathy())
            except:
                isoelectric_point2.append(0)
                pass
        df["AA2_Iso"] = isoelectric_point2
        df["AA_Iso_Delta"] = df["AA2_Iso"] - df["AA1_Iso"]
        df = df[["AA1_Iso", "AA2_Iso", "AA_Iso_Delta"]]
    return df
예제 #2
0
def get_features_physchem(seq):
    """Return dictionary of all features associated with physiochemical properties."""
    SeqOb = SequenceParameters(seq)
    return {'fraction_acidic': fraction_acidic(seq), 'fraction_basic': fraction_basic(seq),
            'fraction_aliphatic': fraction_aliphatic(seq), 'fraction_aromatic': fraction_aromatic(seq),
            'fraction_polar': fraction_polar(seq), 'fraction_disorder': fraction_disorder(seq), 'fraction_chainexp': fraction_chainexp(seq),
            'hydropathy': SeqOb.get_uversky_hydropathy(), 'isopoint': predict_isoelectric_point(seq),
            'loglen': log2(len(seq)), 'PPII_propensity': SeqOb.get_PPII_propensity()}
예제 #3
0
def feat_physchem(seq):
    SeqOb = SequenceParameters(seq)
    return {
        'frac_acidic': frac_acidic(seq),
        'frac_basic': frac_basic(seq),
        'frac_aliphatic': frac_aliphatic(seq),
        'frac_chainexp': frac_chainexp(seq),
        'frac_polar': frac_polar(seq),
        'frac_aromatic': frac_aromatic(seq),
        'frac_disorder': frac_disorder(seq),
        'loglen': log2(len(seq)),
        'hydropathy': SeqOb.get_uversky_hydropathy(),
        'iso_point': ProteinAnalysis(seq).isoelectric_point(),
        'PPII_prop': SeqOb.get_PPII_propensity()
    }