def Decriptor_generator(self, ps): protein = PyPro() protein.ReadProteinSequence(ps) DS_1 = protein.GetAAComp() # print len(DS_1) #DS_2 = protein.GetDPComp() # print len(DS_2) #DS_3 = protein.GetTPComp() # takes time # print len(DS_3) DS_4 = protein.GetTriad() DS_5 = protein.GetPAAC(lamda=5, weight=0.5) # takes time DS_6 = protein.GetAPAAC(lamda=5, weight=0.5) # takes time DS_7 = protein.GetCTD() DS_8 = protein.GetGearyAuto() DS_9 = protein.GetMoranAuto() DS_10 = protein.GetMoreauBrotoAuto() DS_11 = protein.GetQSO() DS_12 = protein.GetSOCN() DS_ALL = {} for DS in (DS_1, DS_4, DS_5, DS_6, DS_7, DS_8, DS_9, DS_10, DS_11, DS_12): DS_ALL.update(DS) # print len(DS_ALL) return DS_ALL
def Decriptor_generator(self, ps): protein = PyPro() protein.ReadProteinSequence(ps) moran = protein.GetPAAC(lamda=5,weight=0.5) DS_1 = protein.GetAPAAC(lamda=5,weight=0.5) DS_2 = protein.GetCTD() DS_3 = protein.GetDPComp() DS_4 = protein.GetGearyAuto() DS_5 = protein.GetMoranAuto() DS_6 = protein.GetMoreauBrotoAuto() DS_7 = protein.GetQSO() DS_8 = protein.GetSOCN() DS_9 = protein.GetTPComp() DS_ALL = {} for DS in (DS_1,DS_2,DS_3,DS_4,DS_5,DS_6,DS_7,DS_8,DS_9,moran): DS_ALL.update(DS) return DS_ALL
def Decriptor_generator(infile, lamda, weight, maxlag, destype, out_file): list_pep_name = [] f = open(infile) lines = f.readlines() for line in lines: if ">" in line: pass else: list_pep_name.append(line.strip('\n')) out_df = pd.DataFrame() for seq in list_pep_name: protein = PyPro() protein.ReadProteinSequence(seq) if destype == "GetAAComp": DS = protein.GetAAComp() df = pd.DataFrame(DS, index=[0]) elif destype == "GetDPComp": DS = protein.GetDPComp() df = pd.DataFrame(DS, index=[0]) elif destype == "GetTPComp": DS = protein.GetTPComp() df = pd.DataFrame(DS, index=[0]) elif destype == "GetMoreauBrotoAuto": DS = protein.GetMoreauBrotoAuto() df = pd.DataFrame(DS, index=[0]) elif destype == "GetMoranAuto": DS = protein.GetMoranAuto() df = pd.DataFrame(DS, index=[0]) elif destype == "GetGearyAuto": DS = protein.GetGearyAuto() df = pd.DataFrame(DS, index=[0]) elif destype == "GetCTD": DS = protein.GetCTD() df = pd.DataFrame(DS, index=[0]) elif destype == "GetPAAC": DS = protein.GetPAAC(lamda=int(lamda), weight=float(weight)) df = pd.DataFrame(DS, index=[0]) elif destype == "GetAPAAC": DS = protein.GetAPAAC(lamda=int(lamda), weight=float(weight)) df = pd.DataFrame(DS, index=[0]) elif destype == "GetSOCN": DS = protein.GetSOCN(maxlag=int(maxlag)) df = pd.DataFrame(DS, index=[0]) elif destype == "GetQSO": DS = protein.GetQSO(maxlag=int(maxlag), weight=float(weight)) df = pd.DataFrame(DS, index=[0]) elif destype == "GetTriad": DS = protein.GetTriad() df = pd.DataFrame(DS, index=[0]) elif destype == "All": DS1 = protein.GetAAComp() DS2 = protein.GetDPComp() DS3 = protein.GetTPComp() DS4 = protein.GetMoreauBrotoAuto() DS5 = protein.GetMoranAuto() DS6 = protein.GetGearyAuto() DS7 = protein.GetCTD() DS8 = protein.GetPAAC(lamda=int(lamda), weight=float(weight)) DS9 = protein.GetAPAAC(lamda=int(lamda), weight=float(weight)) DS10 = protein.GetSOCN(maxlag=int(maxlag)) DS11 = protein.GetQSO(maxlag=int(maxlag), weight=float(weight)) DS12 = protein.GetTriad() DS = {} for D in (DS1, DS2, DS3, DS4, DS5, DS6, DS7, DS8, DS9, DS10, DS11, DS12): print(D) DS.update(D) df = pd.DataFrame(DS, index=[0]) if destype == 'BinaryDescriptor': out_df = BinaryDescriptor(list_pep_name) else: out_df = pd.concat([out_df, df], axis=0) out_df.to_csv(out_file, index=False, sep='\t')