def main(Pt): print (Pt) filein = '/home/wzt/project/GeneFusion/Data2/{}/{}.mupexi'.format(Pt, Pt) fileout = '/home/wzt/project/GeneFusion/Data2/{}/{}.filter.mupexi'.format(Pt, Pt) with open(filein,'r') as fin,open(fileout,'w') as fout: fout.write('HLA\tSample\tmismatch\t') fout.write('MTpep\tMTpep_score\tMTpep_aff\tMTpep_rank\tMTpep_comb\t') fout.write('WTpep\tWTpep_score\tWTpep_aff\tWTpep_rank\tWTpep_comb\t') fout.write('Hydro_Model\tSelf_similar\tR\tA\n') for line in fin: if line.startswith('#') or line.startswith('HLA_allele'): continue lines = line.strip().split('\t') if isfloat(lines[11]) <= 2 and isfloat(lines[26]) * isfloat(lines[16]) >= 1 and lines[25] == 'No': hla = lines[0] mtpep,mtpep_score, mtpep_aff, mtpep_rank = lines[7],lines[12],lines[10],lines[11] wtpep,wtpep_score, wtpep_aff, wtpep_rank = lines[1], lines[6], lines[4], lines[5] mtpep_tap, mtpep_cleavage, mtpep_comb = cal_netctlpan(hla, mtpep) wtpep_tap, wtpep_cleavage, wtpep_comb = cal_netctlpan(hla, wtpep) A = Neoantigen(mtpep, wtpep, mtpep_aff, wtpep_aff).getA() R = getR(mtpep, iedb_seq) H = getH(mtpep, mymodel) self_similar = cal_similarity(mut_pep=mtpep, wild_pep=wtpep) mismatch = lines[17] out = '\t'.join((hla, Pt, str(mismatch), mtpep, mtpep_score, mtpep_aff, mtpep_rank, mtpep_comb, wtpep, wtpep_score,wtpep_aff, wtpep_rank, wtpep_comb,str(H),str(self_similar),str(R),str(A))) fout.write('{}\n'.format(out))
def parseMHC(SRR): os.chdir('/home/wzt/project/GeneFusion/Data1/{}'.format(SRR)) fileout = 'fusion_score.tsv' filein = '{}.netMHC.txt'.format(SRR) if os.path.isfile(filein) and os.path.getsize(filein) > 0: with open(fileout,'w') as fout,open(filein,'r') as fin: fout.write('HLA\tSample\tmismatch\t') fout.write('MTpep\tMTpep_score\tMTpep_aff\tMTpep_rank\tMTpep_comb\t') fout.write('WTpep\tWTpep_score\tWTpep_aff\tWTpep_rank\tWTpep_comb\t') fout.write('Hydro_Model\tSelf_similar\tR\tA\n') for line in fin: if re.search('=\s+WB',line) or re.search('=\s+SB',line): lines = line.strip().split() mtpep,mtpep_score, mtpep_aff, mtpep_rank = lines[2],lines[11], lines[12], lines[13] wtpep,mismatch = getwildpep(mtpep) hla = lines[1].replace('*','') if int(mismatch) == 0: continue sample = SRR + '_' + lines[10] wtpep_score, wtpep_aff, wtpep_rank = cal_netMHCpan(hla, wtpep) mtpep_tap, mtpep_cleavage, mtpep_comb = cal_netctlpan(hla, mtpep) wtpep_tap, wtpep_cleavage, wtpep_comb = cal_netctlpan(hla, wtpep) A = Neoantigen(mtpep,wtpep,mtpep_aff,wtpep_aff).getA() R = getR(mtpep,iedb_seq) H = getH(mtpep, mymodel) self_similar = cal_similarity(mut_pep=mtpep, wild_pep=wtpep) out = '\t'.join((hla, sample, str(mismatch), mtpep, mtpep_score, mtpep_aff, mtpep_rank, mtpep_comb, wtpep,wtpep_score,wtpep_aff,wtpep_rank,wtpep_comb,str(H),str(self_similar),str(R),str(A))) out = out + '\n' fout.write(out)
def parsernetMHCpan(SRR): print('\n######## Running neoAntigen Scoring ############') os.chdir('/home/wzt/project/GeneFusion/Cell_line/Data2/{}'.format(SRR)) filein = 'netMHC.txt' fileout = '/tmp/{}NeoScore.txt'.format(SRR) ref = '' iedb_seq = getIEDBseq() mymodel = getModel() with open('/home/wzt/database/STAR_Fusion/GRCh38/ctat/ref_annot_fusion.pep','r') as fin: for t,seq in SimpleFastaParser(fin): ref = seq break with open(filein,'r') as fin,open(fileout,'w') as fout: fout.write('HLA\tSample\tmismatch\t') fout.write('MTpep\tMTpep_score\tMTpep_aff\tMTpep_rank\tMTpep_comb\t') fout.write('WTpep\tWTpep_score\tWTpep_aff\tWTpep_rank\tWTpep_comb\t') fout.write('Hydro_Model\tR\tScore1\tScore2\n') for line in fin: if re.search('=\s+WB',line) or re.search('=\s+SB',line): lines = line.strip().split() mtpep = lines[2] if ref.find(mtpep) != -1 or len(mtpep) == 8: continue sample = lines[10] mtpep, mtpep_score, mtpep_aff, mtpep_rank = lines[2], lines[11], lines[12], lines[13] wtpep, mismatch = getWildpep(mtpep) hla = lines[1].replace('*', '') wtpep_score, wtpep_aff, wtpep_rank = cal_netMHCpan(hla, wtpep) mtpep_tap, mtpep_cleavage, mtpep_comb = cal_netctlpan(hla, mtpep) wtpep_tap, wtpep_cleavage, wtpep_comb = cal_netctlpan(hla, wtpep) A = Neoantigen(mtpep, wtpep, mtpep_aff, wtpep_aff).getA() R = getR(mtpep, iedb_seq) H = getH(mtpep, mymodel) Rm = cal(float(mtpep_rank)) Rn = cal(float(wtpep_rank)) RA = float(A) * float(R) score1 = f1(Rm=Rm,Rn=Rn,H=float(H),RA=RA,mismatch=float(mismatch),comb=float(mtpep_comb)) if score1 < 0 : score1 = 0 score2 = f2(Rm=Rm,Rn=Rn,mismatch=float(mismatch),comb=float(mtpep_comb)) if score2 < 0 : score2 = 0 out = '\t'.join((hla, sample, str(mismatch), mtpep, mtpep_score, mtpep_aff, mtpep_rank, mtpep_comb, wtpep, wtpep_score, wtpep_aff, wtpep_rank, wtpep_comb, str(H), str(RA))) fout.write('{}\t{}\t{}\n'.format(out,str(score1),str(score2))) filein = '/tmp/{}NeoScore.txt'.format(SRR) fileout = 'NeoScore.txt' if os.path.isfile(filein) and os.path.getsize(filein) > 0: dat = pd.read_csv(filein,sep='\t',header=0) dat.sort_values(by='Score2',ascending=False,inplace=True) dat.to_csv(fileout,sep='\t',header=True,index=False) print('\n######## Complete Successfuly ############')
def main(line): lines = line.strip().split('\t') index = lines[-1] hla = lines[0].replace('*', '') sample = lines[2] + '_' + lines[7] mtpep = lines[1] mtpep_score, mtpep_aff, mtpep_rank = lines[3], lines[4], lines[5] wtpep = getwildpep(mtpep) wtpep_score, wtpep_aff, wtpep_rank = cal_netMHCpan(hla, wtpep, index) mtpep_tap, mtpep_cleavage, mtpep_comb = cal_netctlpan(hla, mtpep, index) wtpep_tap, wtpep_cleavage, wtpep_comb = cal_netctlpan(hla, wtpep, index) A = Neoantigen(mtpep, wtpep, mtpep_aff, wtpep_aff).getA() R = getR(mtpep, iedb_seq) H = getH(mtpep, mymodel) self_similar = cal_similarity(mut_pep=mtpep, wild_pep=wtpep) mismatch = cal_mismatch(mtpep, wtpep) out = '\t'.join( (hla, sample, str(mismatch), mtpep, mtpep_score, mtpep_aff, mtpep_rank, mtpep_comb, wtpep, wtpep_score, wtpep_aff, wtpep_rank, wtpep_comb, str(H), str(self_similar), str(R), str(A))) out = out + '\n' return out