def aa_analysis(df, property):
    if property == "ncpr":
        df = df[pd.notnull(df['Amino_acids'])]
        df[["AA1","AA2"]] = df['Amino_acids'].str.split('/',expand=True)
        isoelectric_point = []
        for sequence in df["AA1"]:
            try:
                cdr3 = ProteinAnalysis(str(sequence))
                cidercdr3 = SequenceParameters(str(sequence)) 
                isoelectric_point.append(cidercdr3.get_NCPR())
            except:
                isoelectric_point.append(0)
                pass
        df["AA1_Iso"] = isoelectric_point
        isoelectric_point2 = []
        for sequence in df["AA2"]:
            try:
                cdr3 = ProteinAnalysis(str(sequence))
                cidercdr3 = SequenceParameters(str(sequence)) 
                isoelectric_point2.append(cidercdr3.get_NCPR())
            except:
                isoelectric_point2.append(0)
                pass
        df["AA2_Iso"] = isoelectric_point2
        df["AA_Iso_Delta"] = df["AA2_Iso"] - df["AA1_Iso"]
        df = df[["AA1_Iso", "AA2_Iso", "AA_Iso_Delta"]]
    elif property == "uversky_hydropathy":
        df = df[pd.notnull(df['Amino_acids'])]
        df[["AA1","AA2"]] = df['Amino_acids'].str.split('/',expand=True)
        isoelectric_point = []
        for sequence in df["AA1"]:
            try:
                cdr3 = ProteinAnalysis(str(sequence))
                cidercdr3 = SequenceParameters(str(sequence)) 
                isoelectric_point.append(cidercdr3.get_uversky_hydropathy())
            except:
                isoelectric_point.append(0)
                pass
        df["AA1_Iso"] = isoelectric_point
        isoelectric_point2 = []
        for sequence in df["AA2"]:
            try:
                cdr3 = ProteinAnalysis(str(sequence))
                cidercdr3 = SequenceParameters(str(sequence)) 
                isoelectric_point2.append(cidercdr3.get_uversky_hydropathy())
            except:
                isoelectric_point2.append(0)
                pass
        df["AA2_Iso"] = isoelectric_point2
        df["AA_Iso_Delta"] = df["AA2_Iso"] - df["AA1_Iso"]
        df = df[["AA1_Iso", "AA2_Iso", "AA_Iso_Delta"]]
    return df
Beispiel #2
0
    #can't use SeqParam(seqfile=file)
    #because all the sequences are appended to each other.
    output = open(file + "_charge", 'w+')
    for protein in [x for x in contents if x]:

        header = protein[0:protein.index('\n')]
        seq = protein[protein.index('\n'):-1]

        print(header)
        if ('X' in seq):
            print("Warning: unspecified protein encountered.")
        seq = seq.replace('X', '')

        seq_param = SequenceParameters(seq)

        #mean_net_charge is always positive, whereas
        # net_charge_per_residue is alternating
        net_charge = seq_param.get_NCPR(pH=7.0) * seq_param.get_length()
        print(net_charge)

        output.write(header)
        output.write(", ")
        output.write(str(7.0))
        output.write(str(", "))
        output.write(str(seq_param.get_molecular_weight()))
        output.write(str(", "))
        output.write(str(net_charge))
        output.write('\n')

    output.close()