Exemple #1
0
    def Decriptor_generator(self, ps):
        protein = PyPro()
        protein.ReadProteinSequence(ps)
        DS_1 = protein.GetAAComp()
        # print len(DS_1)
        #DS_2 = protein.GetDPComp()

        # print len(DS_2)
        #DS_3 = protein.GetTPComp() # takes time
        # print len(DS_3)
        DS_4 = protein.GetTriad()

        DS_5 = protein.GetPAAC(lamda=5, weight=0.5)  # takes time

        DS_6 = protein.GetAPAAC(lamda=5, weight=0.5)  # takes time

        DS_7 = protein.GetCTD()

        DS_8 = protein.GetGearyAuto()

        DS_9 = protein.GetMoranAuto()

        DS_10 = protein.GetMoreauBrotoAuto()

        DS_11 = protein.GetQSO()

        DS_12 = protein.GetSOCN()

        DS_ALL = {}

        for DS in (DS_1, DS_4, DS_5, DS_6, DS_7, DS_8, DS_9, DS_10, DS_11,
                   DS_12):
            DS_ALL.update(DS)
        # print len(DS_ALL)
        return DS_ALL
    def Decriptor_generator(self, ps):

        protein = PyPro()
        protein.ReadProteinSequence(ps)
        moran = protein.GetPAAC(lamda=5,weight=0.5)
        DS_1 = protein.GetAPAAC(lamda=5,weight=0.5)
        DS_2 = protein.GetCTD()
        DS_3 = protein.GetDPComp()
        DS_4 = protein.GetGearyAuto()
        DS_5 = protein.GetMoranAuto()
        DS_6 = protein.GetMoreauBrotoAuto()
        DS_7 = protein.GetQSO()
        DS_8 = protein.GetSOCN()
        DS_9 = protein.GetTPComp()

        DS_ALL = {}

        for DS in (DS_1,DS_2,DS_3,DS_4,DS_5,DS_6,DS_7,DS_8,DS_9,moran):
            DS_ALL.update(DS)
            
        return DS_ALL
def Decriptor_generator(infile, lamda, weight, maxlag, destype, out_file):

    list_pep_name = []
    f = open(infile)
    lines = f.readlines()

    for line in lines:
        if ">" in line:
            pass
        else:
            list_pep_name.append(line.strip('\n'))

    out_df = pd.DataFrame()

    for seq in list_pep_name:

        protein = PyPro()
        protein.ReadProteinSequence(seq)

        if destype == "GetAAComp":
            DS = protein.GetAAComp()
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetDPComp":
            DS = protein.GetDPComp()
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetTPComp":
            DS = protein.GetTPComp()
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetMoreauBrotoAuto":
            DS = protein.GetMoreauBrotoAuto()
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetMoranAuto":
            DS = protein.GetMoranAuto()
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetGearyAuto":
            DS = protein.GetGearyAuto()
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetCTD":
            DS = protein.GetCTD()
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetPAAC":
            DS = protein.GetPAAC(lamda=int(lamda), weight=float(weight))
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetAPAAC":
            DS = protein.GetAPAAC(lamda=int(lamda), weight=float(weight))
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetSOCN":
            DS = protein.GetSOCN(maxlag=int(maxlag))
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetQSO":
            DS = protein.GetQSO(maxlag=int(maxlag), weight=float(weight))
            df = pd.DataFrame(DS, index=[0])
        elif destype == "GetTriad":
            DS = protein.GetTriad()
            df = pd.DataFrame(DS, index=[0])
        elif destype == "All":
            DS1 = protein.GetAAComp()
            DS2 = protein.GetDPComp()
            DS3 = protein.GetTPComp()
            DS4 = protein.GetMoreauBrotoAuto()
            DS5 = protein.GetMoranAuto()
            DS6 = protein.GetGearyAuto()
            DS7 = protein.GetCTD()
            DS8 = protein.GetPAAC(lamda=int(lamda), weight=float(weight))
            DS9 = protein.GetAPAAC(lamda=int(lamda), weight=float(weight))
            DS10 = protein.GetSOCN(maxlag=int(maxlag))
            DS11 = protein.GetQSO(maxlag=int(maxlag), weight=float(weight))
            DS12 = protein.GetTriad()

            DS = {}

            for D in (DS1, DS2, DS3, DS4, DS5, DS6, DS7, DS8, DS9, DS10, DS11,
                      DS12):
                print(D)
                DS.update(D)
            df = pd.DataFrame(DS, index=[0])

        if destype == 'BinaryDescriptor':
            out_df = BinaryDescriptor(list_pep_name)
        else:
            out_df = pd.concat([out_df, df], axis=0)

    out_df.to_csv(out_file, index=False, sep='\t')