def aa_analysis(df, property): if property == "ncpr": df = df[pd.notnull(df['Amino_acids'])] df[["AA1","AA2"]] = df['Amino_acids'].str.split('/',expand=True) isoelectric_point = [] for sequence in df["AA1"]: try: cdr3 = ProteinAnalysis(str(sequence)) cidercdr3 = SequenceParameters(str(sequence)) isoelectric_point.append(cidercdr3.get_NCPR()) except: isoelectric_point.append(0) pass df["AA1_Iso"] = isoelectric_point isoelectric_point2 = [] for sequence in df["AA2"]: try: cdr3 = ProteinAnalysis(str(sequence)) cidercdr3 = SequenceParameters(str(sequence)) isoelectric_point2.append(cidercdr3.get_NCPR()) except: isoelectric_point2.append(0) pass df["AA2_Iso"] = isoelectric_point2 df["AA_Iso_Delta"] = df["AA2_Iso"] - df["AA1_Iso"] df = df[["AA1_Iso", "AA2_Iso", "AA_Iso_Delta"]] elif property == "uversky_hydropathy": df = df[pd.notnull(df['Amino_acids'])] df[["AA1","AA2"]] = df['Amino_acids'].str.split('/',expand=True) isoelectric_point = [] for sequence in df["AA1"]: try: cdr3 = ProteinAnalysis(str(sequence)) cidercdr3 = SequenceParameters(str(sequence)) isoelectric_point.append(cidercdr3.get_uversky_hydropathy()) except: isoelectric_point.append(0) pass df["AA1_Iso"] = isoelectric_point isoelectric_point2 = [] for sequence in df["AA2"]: try: cdr3 = ProteinAnalysis(str(sequence)) cidercdr3 = SequenceParameters(str(sequence)) isoelectric_point2.append(cidercdr3.get_uversky_hydropathy()) except: isoelectric_point2.append(0) pass df["AA2_Iso"] = isoelectric_point2 df["AA_Iso_Delta"] = df["AA2_Iso"] - df["AA1_Iso"] df = df[["AA1_Iso", "AA2_Iso", "AA_Iso_Delta"]] return df
#can't use SeqParam(seqfile=file) #because all the sequences are appended to each other. output = open(file + "_charge", 'w+') for protein in [x for x in contents if x]: header = protein[0:protein.index('\n')] seq = protein[protein.index('\n'):-1] print(header) if ('X' in seq): print("Warning: unspecified protein encountered.") seq = seq.replace('X', '') seq_param = SequenceParameters(seq) #mean_net_charge is always positive, whereas # net_charge_per_residue is alternating net_charge = seq_param.get_NCPR(pH=7.0) * seq_param.get_length() print(net_charge) output.write(header) output.write(", ") output.write(str(7.0)) output.write(str(", ")) output.write(str(seq_param.get_molecular_weight())) output.write(str(", ")) output.write(str(net_charge)) output.write('\n') output.close()