def aa_analysis(df, property): if property == "ncpr": df = df[pd.notnull(df['Amino_acids'])] df[["AA1","AA2"]] = df['Amino_acids'].str.split('/',expand=True) isoelectric_point = [] for sequence in df["AA1"]: try: cdr3 = ProteinAnalysis(str(sequence)) cidercdr3 = SequenceParameters(str(sequence)) isoelectric_point.append(cidercdr3.get_NCPR()) except: isoelectric_point.append(0) pass df["AA1_Iso"] = isoelectric_point isoelectric_point2 = [] for sequence in df["AA2"]: try: cdr3 = ProteinAnalysis(str(sequence)) cidercdr3 = SequenceParameters(str(sequence)) isoelectric_point2.append(cidercdr3.get_NCPR()) except: isoelectric_point2.append(0) pass df["AA2_Iso"] = isoelectric_point2 df["AA_Iso_Delta"] = df["AA2_Iso"] - df["AA1_Iso"] df = df[["AA1_Iso", "AA2_Iso", "AA_Iso_Delta"]] elif property == "uversky_hydropathy": df = df[pd.notnull(df['Amino_acids'])] df[["AA1","AA2"]] = df['Amino_acids'].str.split('/',expand=True) isoelectric_point = [] for sequence in df["AA1"]: try: cdr3 = ProteinAnalysis(str(sequence)) cidercdr3 = SequenceParameters(str(sequence)) isoelectric_point.append(cidercdr3.get_uversky_hydropathy()) except: isoelectric_point.append(0) pass df["AA1_Iso"] = isoelectric_point isoelectric_point2 = [] for sequence in df["AA2"]: try: cdr3 = ProteinAnalysis(str(sequence)) cidercdr3 = SequenceParameters(str(sequence)) isoelectric_point2.append(cidercdr3.get_uversky_hydropathy()) except: isoelectric_point2.append(0) pass df["AA2_Iso"] = isoelectric_point2 df["AA_Iso_Delta"] = df["AA2_Iso"] - df["AA1_Iso"] df = df[["AA1_Iso", "AA2_Iso", "AA_Iso_Delta"]] return df
def get_features_physchem(seq): """Return dictionary of all features associated with physiochemical properties.""" SeqOb = SequenceParameters(seq) return {'fraction_acidic': fraction_acidic(seq), 'fraction_basic': fraction_basic(seq), 'fraction_aliphatic': fraction_aliphatic(seq), 'fraction_aromatic': fraction_aromatic(seq), 'fraction_polar': fraction_polar(seq), 'fraction_disorder': fraction_disorder(seq), 'fraction_chainexp': fraction_chainexp(seq), 'hydropathy': SeqOb.get_uversky_hydropathy(), 'isopoint': predict_isoelectric_point(seq), 'loglen': log2(len(seq)), 'PPII_propensity': SeqOb.get_PPII_propensity()}
def feat_physchem(seq): SeqOb = SequenceParameters(seq) return { 'frac_acidic': frac_acidic(seq), 'frac_basic': frac_basic(seq), 'frac_aliphatic': frac_aliphatic(seq), 'frac_chainexp': frac_chainexp(seq), 'frac_polar': frac_polar(seq), 'frac_aromatic': frac_aromatic(seq), 'frac_disorder': frac_disorder(seq), 'loglen': log2(len(seq)), 'hydropathy': SeqOb.get_uversky_hydropathy(), 'iso_point': ProteinAnalysis(seq).isoelectric_point(), 'PPII_prop': SeqOb.get_PPII_propensity() }