def sheet6(self,ws, book): infolog("Start sheet6") col = 'B' for i in range(0, len(self.s1)): ws[col+'2'] = str(self.s1[i]) + '~' + str(self.s2[i]) +'%' # if self.s2[i] != 51.0 else str(self.s1[i]) + '%~' # get basepair between s1 and s2 mxr = [] for n in range(0, book.nsheets): _, _, minor_ = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i]) mxr.append(book.BP35[n].loc[minor_.index]) for x in range(-1, book.nsheets): ws[col+'3'] = book.sheet_list[x] if x != -1 else "Major/Minor" rows = 4 for a in range(0, len(book.col_basepair)): for b in range(0, len(book.col_basepair)): if a==b: continue ws[col+str(rows)] = len(mxr[x][logical_and( mxr[x]['major_idx']==a, mxr[x]['minor_idx']==b )]) if x != -1 else book.col_basepair[a] + '/' + book.col_basepair[b] rows = rows + 1 col = next_col(col) col = next_col(col) infolog("End sheet6")
def sheet2(self, ws, book): infolog("Start Sheet2") ws['A1'] = "strain" ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2) ws['C1'] = "Genome length (bp)" ws.merge_cells(start_row=1, start_column=3, end_row=2, end_column=3) ws['D1'] = "Range of minor allele frequency(%)" ws.merge_cells(start_row=1, start_column=4, end_row=1, end_column=4+len(self.s1)) col = 'D' for ss1, ss2 in zip(self.s1, self.s2): ws[col+'2'] = str(ss1) + '≤f<' + str(ss2) col = next_col(col) for i in range(book.nsheets): infolog("Writing {0} sheet".format(book.sheet_list[i])) ws['A' + str(i+3)] = book.sheet_list[i] ws['C' + str(i+3)] = book.BPRawLength[i] ws.merge_cells(start_row=3+i, start_column=1, end_row=3+i, end_column=2) col = 'D' for r1, r2 in zip(self.s1, self.s2): s, l, _ = book.get_Number_of_GPS(book.BP35[i], r1, r2) if len(book.BPxMinor[i])!=0 else (0,0) ws[col + str(i+3)] = str(round(s/l, 3))+'%' if l is not 0 else '-' col = next_col(col) infolog("End Sheet2")
def sheet1(self,ws, book): infolog("Start Sheet1") ws.title = "Polymorphic site" ws['A1'] = "strain" ws['C1'] = "Genome length (bp)" ws['D1'] = "Average of MAF" ws['E1'] = "Number of polymorphic site" ws.merge_cells(start_row=1, start_column=5, end_row=1, end_column=5+len(self.s1)) ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2) ws.merge_cells(start_row=1, start_column=3, end_row=2, end_column=3) ws.merge_cells(start_row=1, start_column=4, end_row=2, end_column=4) col = 'E' for ci in range(len(self.s1)): ws[col+'2'] = str(self.s1[ci]) + '≤n<' + str(self.s2[ci]) col = next_col(col) for ni in range(book.nsheets): infolog("Writing {} sheet".format(book.sheet_list[ni])) ws['A' + str(ni+3)] = book.sheet_list[ni] ws.merge_cells(start_row=3+ni, start_column=1, end_row=3+ni, end_column=2) ws['C' + str(ni+3)] = book.BPRawLength[ni] s, l, _ = book.get_Number_of_GPS(book.BP35[ni], 5.0, 51.0) ws['D' + str(ni+3)] = str(round(s / l, 3)) + '%' col = 'E' for ss1, ss2 in zip(self.s1, self.s2): ws[col + str(ni+3)] = book.get_Number_of_GPS(book.BP35[ni], ss1, ss2)[1] col = next_col(col) # ws['E2'] = "2.5≤f<5" # ws['F2'] = "5≤f<15" # ws['G2'] = "15≤f<25" # ws['H2'] = "25≤f" # ws['I2'] = "sum" # # A col # for i in range(book.nsheets): # infolog("{0} Writing {1} sheet".format(time.time(), book.sheet_list[i])) # ws['A' + str(i+3)] = book.sheet_list[i] # ws.merge_cells(start_row=3+i, start_column=1, end_row=3+i, end_column=2) # ws['C' + str(i+3)] = book.BPRawLength[i] # s, l, _ = book.get_Number_of_GPS(book.BP35[i], 5.0, 51.0) # ws['D' + str(i+3)] = str(round(s / l, 3)) + '%' # ws['E' + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 2.5, 5.0)[1] # ws["F" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 5.0, 15.0)[1] # ws["G" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 15.0, 25.0)[1] # ws["H" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 25.0, 51.0)[1] # ws["I" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 5.0, 51.0)[1] infolog("End Sheet1")
def BaseComp(self, ws, types_, book): """ major가 같은 것, minor도 같은 것만 추출 minor x -> 증가 포함 1. major_idx_x == major_idx_y - 치환 제거 2. (minor_idx_x == minor_idx_y) or ((minor_idx_x != minor_idx_y) and minor_x == 0) """ infolog("lowhigh BaseComp start") ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1) ws['B1'], ws['B2'] = 'Major', 'Minor' ws['D1'], ws['H1'], ws['L1'], ws['P1'] = 'A', 'G', 'C', 'T' ws['G2'], ws['K2'], ws['O2'] = 'a', 'a', 'a' ws['C2'], ws['L2'], ws['P2'] = 'g', 'g', 'g' ws['D2'], ws['H2'], ws['Q2'] = 'c', 'c', 'c' ws['E2'], ws['I2'], ws['M2'] = 't', 't', 't' cols = ['A','G','C','T'] bp = self.BPmergedinc if types_ is "INC" else self.BPmergeddec for r in range(book.nsheets): ws['A'+str(r+3)] = book.sheet_list[r] for r in range(book.nsheets): col = 'C' cond1 = bp[r]['major_idx_x'] == bp[r]['major_idx_y'] cond2 = bp[r]['minor_idx_x'] == bp[r]['minor_idx_y'] cond3 = logical_and( bp[r]['minor_idx_x'] != bp[r]['minor_idx_y'], bp[r]['minor_x'] == 0 ) idx = logical_and(cond1, logical_or(cond2, cond3)) tmp = bp[r][ idx.values ] for a in range(len(cols)): for b in range(len(cols)): if a == b: continue # major는 _x, _y 가 이미 같고, minor는 _y기준으로 하면 _x에서 minor가 0이던값 무시됨 ws[col+str(r+3)] = len(tmp[ logical_and(tmp['major_idx_y'] == a, tmp['minor_idx_y'] == b).values ]) col = next_col(col) col = next_col(col) infolog("lowhigh BaseComp end")
def PolymorphicSite(self, ws, types_, book): infolog("lowhigh PolymorphicSite start") ws.title = "INC-Polymorphic site" # A col ws['A1'] = types_ ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2) # C col ws['C1'] = "Increase in genetic polymrphism (%)" ws.merge_cells(start_row=1, start_column=3, end_row=1, end_column=6) from numpy import logical_and for i in range(book.nsheets): ws['A' + str(i+3)] = book.sheet_list[i] ws.merge_cells(start_row=i+3, start_column=1, end_row=i+3, end_column=2) col = 'C' bp = self.BPmergedinc[i][['diffofinc']] if types_ == 'INC' else self.BPmergeddec[i][['diffofdec']] for s1_, s2_ in zip(self.s1, self.s2): ws[col+'2'] = str(s1_) + '≤n<' + str(s2_) # 'Sum' if s1_ == 5.0 and s2_ == 51.0 else str(s1_) + '≤n<' + str(s2_) ws[col+str(i+3)] = len(bp[ logical_and(bp >= s1_, bp < s2_).values]) if len(bp) is not 0 else 0 col = next_col(col) infolog("lowhigh PolymorphicSite end")
def ORFNCR(self, ws, types_, title, book): infolog("lowhigh {0} start".format(title)) ws['A1'] = title ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1) ws['B1'] = 'Length' ws.merge_cells(start_row=1, start_column=2, end_row=2, end_column=2) orfncr = book.ORF if title is 'ORF' else book.NCR col_diff = "diffofinc" if types_ == 'INC' else "diffofdec" bp = self.BPmergedinc if types_ == 'INC' else self.BPmergeddec sum_, col = 0, 'B' for si in range(-1, book.nsheets): sum_ = 0 if si != -1: ws[col+'2'] = book.sheet_list[si] for r in range(len(orfncr)): if si == -1: ws['A'+str(r+3)] = orfncr[r] cnt = len(book.Dumas[ book.Dumas[book.col_ORF] == orfncr[r] ]) ws[col+str(r+3)] = cnt sum_ += cnt else: cnt = len( bp[si][ bp[si][book.col_ORF] == orfncr[r] ] ) ws[col+str(r+3)] = cnt sum_ += cnt ws[col+str(3+len(orfncr))] = sum_ col = next_col(col) ws['A'+str(3+len(orfncr))] = 'Total' col = next_col(col) for i in range(len(self.s1)): # ORF or CNR 종류 ws[col+'1'] = str(self.s1[i])+'~'+str(self.s2[i]) #if self.s2[i]!=51.0 else str(self.s1[i])+'~' for r in range(len(orfncr)): ws[col+str(r+3)] = orfncr[r] col = next_col(col) col_GPS = col ws[col+'1'] = '{0} in genetic polymorphism'.format("Increase" if types_ == 'INC' else "Decrease") for s in range(book.nsheets): ws[col+'2'] = book.sheet_list[s] bpx = bp[s][ logical_and(bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i]).values ] sum_ = 0 for r in range(3, 3+len(orfncr)): cnt = len(bpx[ bpx[book.col_ORF] == orfncr[r-3]]) ws[col+str(r)] = cnt sum_ += cnt ws[col+str(3+len(orfncr))] = sum_ col = next_col(col) ws[col+'1'] = 'Average {0}'.format("Increase" if types_ == 'INC' else "Decrease") for s in range(book.nsheets): ws[col+'2'] = book.sheet_list[s] bpx = bp[s][ logical_and(bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i]).values ] sum_ = 0 for r in range(3, 3+len(orfncr)): cnt = bpx[ bpx[book.col_ORF] == orfncr[r-3]][[col_diff]] ws[col+str(r)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A' sum_ += cnt.sum()[0] col = next_col(col) ws[col+'1'] = 'Number of GPS / Length' for s in range(book.nsheets): ws[col+'2'] = book.sheet_list[s] for r in range(3, 3+len(orfncr)): s, l = float(ws[col_GPS+str(r)].value), float(ws['B'+str(r)].value) ws[col+str(r)] = str( round(s/l, 6) ) if l != 0 else 'N/A' col_GPS = next_col(col_GPS) col = next_col(col) col = next_col(col) infolog("lowhigh {0} end".format(title))
def GenomeStr(self, ws, types_, book): infolog("lowhigh GenomeStr start") bp = self.BPmergedinc if types_ == 'INC' else self.BPmergeddec col_diff = "diffofinc" if types_ == 'INC' else "diffofdec" # s1 ~ s2 for i in range(len(self.s1)): rows = i*(14+len(book.GenomeStructure) + len(book.RepeatRegion))+ 1 ws['A'+str(rows)] = 'Region' ws.merge_cells(start_row=rows, start_column=1, end_row=rows+1, end_column=1) ws['B'+str(rows)] = 'Dumas Length' ws.merge_cells(start_row=rows, start_column=2, end_row=rows+1, end_column=2) for r in range(2, 2+len(book.GenomeStructure)): ws['A' + str(rows+r)] = book.GenomeStructure[r-2] ws['B' + str(rows+r)] = len(book.Dumas[ book.Dumas[book.col_GenomeStructure] == book.GenomeStructure[r-2] ]) ws['A' + str(rows+2+len(book.GenomeStructure))] = 'Total' ws['B' + str(rows+2+len(book.GenomeStructure))] = len(book.Dumas) sum_ = 0 for r in range(len(book.RepeatRegion)): row = rows+r+3+len(book.GenomeStructure) ws['A' + str(row)] = book.RepeatRegion[r] cnt = len(book.Dumas[ book.Dumas[book.col_RepeatRegion] == book.RepeatRegion[r] ]) ws['B' + str(row)] = cnt sum_ += cnt leng = rows+ len(book.GenomeStructure) + len(book.RepeatRegion) ws['A' + str(leng+3)] = 'Total' ws['B' + str(leng+3)] = sum_ ws['A' + str(leng+4)] = 'ORF' ws['B' + str(leng+4)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.ORF)]) ws['A' + str(leng+5)] = 'NCR' ws['B' + str(leng+5)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.NCR)]) col = 'C' ws[col+str(rows)] = '{0} in genetic polymorphism'.format("Increase" if types_ is 'INC' else "Decrease") ws.merge_cells(start_row=rows, start_column=3, end_row=rows, end_column=2+book.nsheets) ws[chr(ord(col)+book.nsheets)+str(rows)] = "Average {0}".format("Increase" if types_ is 'INC' else "Decrease") ws.merge_cells(start_row=rows, start_column=3+book.nsheets, end_row=rows, end_column=2+2*book.nsheets) for s in range(book.nsheets): idx = logical_and( bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i] ) bpx = bp[s][idx.values] ncol = chr(ord(col)+book.nsheets) ws[col+str(rows+1)] = book.sheet_list[s] ws[chr(ord(col)+book.nsheets)+str(rows+1)] = book.sheet_list[s] sum_, avg_ = 0, 0 for r in range(2, 2+len(book.GenomeStructure)): cnt = bpx[ bpx[book.col_GenomeStructure] == book.GenomeStructure[r-2]][[col_diff]] ws[col+str(rows+r)] = len(cnt) ws[ncol+str(rows+r)] = str(round((cnt.sum()[0] / len(cnt)), 3))+'%' if len(cnt) is not 0 else 'N/A' avg_ += cnt.sum()[0] sum_ += len(cnt) # Total ws[col+str(rows+2+len(book.GenomeStructure))] = sum_ ws[chr(ord(col)+book.nsheets)+str(rows+2+len(book.GenomeStructure))] = str(round(avg_/sum_, 3))+'%' if sum_ is not 0 else 'N/A' sum_, avg_ = 0, 0 for r in range(len(book.RepeatRegion)): row = rows+r+3+len(book.GenomeStructure) cnt = bpx[ bpx[book.col_RepeatRegion] == book.RepeatRegion[r]][[col_diff]] ws[col+str(row)] = len(cnt) ws[ncol+str(row)] = str(round((cnt.sum()[0] / len(cnt)),3))+'%' if len(cnt) is not 0 else 'N/A' sum_ += len(cnt) avg_ += cnt.sum()[0] ws[col+str(3+leng)] = sum_ ws[ncol+str(3+leng)] = str(round(avg_/sum_,3))+'%' if sum_ is not 0 else 'N/A' # ORF & NCR cnt = bpx[ bpx[book.col_ORF].isin(book.ORF)][[col_diff]] ws[col+str(4+leng)] = len(cnt) ws[ncol+str(4+leng)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A' cnt = bpx[ bpx[book.col_ORF].isin(book.NCR)][[col_diff]] ws[col+str(5+leng)] = len(cnt) ws[ncol+str(5+leng)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A' col = next_col(col) ncol = chr(ord(ncol)+book.nsheets) ws[ncol+str(rows+2)] = str(self.s1[i])+"~"+str(self.s2[i]) #if self.s2[i] != 51.0 else str(self.s1[i])+"~" infolog("lowhigh GenomeStr end")
def sheet7(self, ws, book): infolog("Start sheet7") for i in range(0, len(self.s1)): r = i * (book.nsheets + 12) + 2 ws["A"+str(r)] = str(self.s1[i]) + "~" + str(self.s2[i]) + "%" #if self.s2[i] != 51.0 else str(self.s1[i]) + "%~" ws["B"+str(r)] = "Virus" ws.merge_cells(start_row=r, start_column=2, end_row=r+2, end_column=2) ws["C"+str(r)] = "Number of GPS" ws.merge_cells(start_row=r, start_column=3, end_row=r+2, end_column=3) ws["D"+str(r)] = "GPS Mean" ws.merge_cells(start_row=r, start_column=4, end_row=r+2, end_column=4) ws["E"+str(r)] = "Major" ws.merge_cells(start_row=r, start_column=5, end_row=r+1, end_column=5) ws["E"+str(r+2)] = "Minor" ws['F'+str(r)] = 'A' ws.merge_cells(start_row=r, start_column=6, end_row=r+1, end_column=8) ws['I'+str(r)] = 'G' ws.merge_cells(start_row=r, start_column=9, end_row=r+1, end_column=11) ws['L'+str(r)] = 'C' ws.merge_cells(start_row=r, start_column=12, end_row=r+1, end_column=14) ws['O'+str(r)] = 'T' ws.merge_cells(start_row=r, start_column=15, end_row=r+1, end_column=17) ws['E'+str(r+3+book.nsheets)] = 'sum' for x in range(0, book.nsheets): row, mafrow = r+x+3, r+book.nsheets+x+5 s, l, minor_ = book.get_Number_of_GPS(book.BP35[x], self.s1[i], self.s2[i]) mxr = book.BP35[x].loc[minor_.index] # s, l = self.book.get_Number_of_GPS(self.PxMinor[x], self.PxSum[x], self.s1[i], self.s2[i]) if len(self.PxMinor[x])!=0 else (0,0) ws["B"+str(row)] = book.sheet_list[x] ws['C'+str(row)] = len(minor_) ws['D'+str(row)] = str(round(s/l, 3)) + '%' if l != 0 else '-' col = 'F' for a in range(0, 4): for b in range(0, 4): if a == b: continue else: ws[col+str(r+2)] = book.col_basepair[b].lower() ext_ = mxr[logical_and( mxr['major_idx']==a, mxr['minor_idx']==b )] ws[col+str(row)] = len(ext_) if len(ext_) is not 0: # maf들의 평균 maf_ = (divide( ext_[['minor']], ext_[['sum']]) * 100).sum()[0] ws[col+str(mafrow)] = str(round( maf_ / len(ext_), 3)) + '%' else: ws[col+str(mafrow)] = 'N/A' # 전체 개수의 maf if ws[col+str(r+3+book.nsheets)].value == None: ws[col+str(r+3+book.nsheets)] = len(ext_) else: ws[col+str(r+3+book.nsheets)].value += len(ext_) if ws[col+str(r+5+ 2*book.nsheets)].value == None: ws[col+str(r+5+ 2*book.nsheets)] = maf_ # ws[col+str(r+5+ 2*book.nsheets+1)] = ext_[['sum']].sum()[0] else: # print(i, "mj ", a, "mn ", b, ext_[['minor']].sum()[0]) ws[col+str(r+5+ 2*book.nsheets)].value += maf_ # ws[col+str(r+5+ 2*book.nsheets+1)].value += ext_[['sum']].sum()[0] col = next_col(col) # total maf col = 'F' numrow, mnrow = r+3+book.nsheets, r+5+2*book.nsheets for c in range(0, 12): ws[col+str(mnrow)] = str(round( float(ws[col+str(mnrow)].value) / float(ws[col+str(numrow)].value), 3))+'%' if ws[col+str(numrow)].value != 0 else 'N/A' col = next_col(col) infolog("End sheet7")
def sheet4_5(self, ws, title, book): infolog("Start sheet {0}".format(4 if title is "ORF" else 5)) ws['B2'] = book.filename ws["C2"] = "(BP_full)Length" ws.merge_cells(start_row=2, start_column=3, end_row=2, end_column=3+book.nsheets-1) ws["B3"] = title col = 'C' for i in range(0, book.nsheets): ws[col + str(3)] = book.sheet_list[i] col = next_col(col) # Full - 35이하 포함 col = 'B' orf_ = book.ORF if title is 'ORF' else book.NCR rows = list(range(4, len(orf_) + 4)) for c in range(0, book.nsheets+1): cnt_ = 0 for i in range(0, len(orf_)): if c is 0: ws[col+str(rows[i])] = orf_[i] else: tmp = len(book.BPRaw[c-1][ book.BPRaw[c-1][book.col_ORF] == orf_[i] ]) ws[col+str(rows[i])] = tmp cnt_ += tmp ws[col + str(4+len(orf_))] = cnt_ if c is not 0 else 'total' col = next_col(col) # s1~s2 범위 col = chr(ord('A')+3+book.nsheets) for i in range(0, len(self.s1)): co = (5+3*book.nsheets) * i + (5+book.nsheets) ws[col+str(2)] = str(self.s1[i]) + '~' + str(self.s2[i]) +'%' if self.s2[i] != 51.0 else str(self.s1[i]) + '%~' ws[col+str(3)] = title # NCR or ORF for ix in range(0, len(orf_)): ws[col+str(rows[ix])] = orf_[ix] ws[col+str(len(orf_) + 4)] = 'total' # Number of GPS of each sheets gps = list() col = next_col(col) ws[col+str(2)] = "Number of GPS" ws.merge_cells(start_row=2, start_column=co, end_row=2, end_column=co+book.nsheets) dcol = next_n_col(col, book.nsheets) for n in range(0, book.nsheets): ws[col+str(3)] = book.sheet_list[n] _, _, tx_minor = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i]) tx_rows = book.BP35[n].loc[ tx_minor.index ] g = [] cnt_ = 0 for ix in range(0, len(orf_)): mrows = tx_rows[ tx_rows[book.col_ORF] == orf_[ix] ] ws[col+str(rows[ix])] = len(mrows) cnt_ += len(mrows) g.append(len(mrows)) # 180612 ws[dcol+str(rows[ix])] = float(ws[dcol+str(rows[ix])].value) + len(mrows) if ws[dcol+str(rows[ix])].value != None else len(mrows) ws[col+str(len(orf_) + 4)] = cnt_ ws[dcol+str(len(orf_) + 4)] = float(ws[dcol+str(len(orf_) + 4)].value) + cnt_ if ws[dcol+str(len(orf_) + 4)].value != None else cnt_ g.append(cnt_) gps.append(g) col = next_col(col) col = next_col(col) gcol = next_n_col(col, book.nsheets) # Average MAF at GPS of each sheets ws[col+str(2)] = "Average MAF at GPS" ws.merge_cells(start_row=2, start_column=co+book.nsheets+1, end_row=2, end_column=co+2*book.nsheets+1) for n in range(0, book.nsheets): val, cnt_ = 0, 0 ws[col+str(3)] = book.sheet_list[n] _, _, tx_minor = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i]) tx_rows = book.BP35[n].loc[ tx_minor.index ] for ix in range(0, len(orf_)): mrows = tx_rows[ tx_rows[book.col_ORF] == orf_[ix] ] if len(mrows) is 0: ws[col+str(rows[ix])] = '-' # 180612 ws[gcol+str(rows[ix])] = ws[gcol+str(rows[ix])].value if ws[gcol+str(rows[ix])].value != None else 0 else: maf_ = divide(mrows[["minor"]], mrows[["sum"]]) * 100 val += maf_.sum()[0] cnt_ += len(maf_) ws[col+str(rows[ix])] = str(round(maf_.sum()[0] / len(maf_), 3)) + '%' # 180612 ws[gcol+str(rows[ix])] = float(ws[gcol+str(rows[ix])].value) + maf_.sum()[0] if ws[gcol+str(rows[ix])].value != None else maf_.sum()[0] # total ws[col+str(len(orf_)+4)] = str(round(val / cnt_, 3)) + '%' if cnt_ is not 0 else '-' ws[gcol+str(len(orf_)+4)] = float(ws[gcol+str(len(orf_)+4)].value) + val if ws[gcol+str(len(orf_)+4)].value != None else val col = next_col(col) col = next_col(col) # "Number of GPS / Length" of each sheets ws[col+str(2)] = "Number of GPS / Length * 100" ws.merge_cells(start_row=2, start_column=co+2*book.nsheets+2, end_row=2, end_column=co+3*book.nsheets+2) ncol = 'C' d2col = next_n_col(col, book.nsheets) for n in range(0, book.nsheets): ws[col+str(3)] = book.sheet_list[n] for ix in range(0, len(orf_)): l_ = int(str(ws[ncol+str(rows[ix])].value)) ws[col+str(rows[ix])] = (gps[n][ix] / l_)*100 if l_ is not 0 else '-' ws[d2col+str(rows[ix])] = float(ws[d2col+str(rows[ix])].value) + l_ if ws[d2col+str(rows[ix])].value != None else l_ # total l_ = int(str(ws[ncol+str(len(orf_)+4)].value)) ws[col+str(len(orf_)+4)] = (gps[n][-1] / l_)*100 if l_ is not 0 else '-' ws[d2col+str(len(orf_)+4)] = float(ws[d2col+str(len(orf_)+4)].value) + l_ if ws[d2col+str(len(orf_)+4)].value != None else l_ ncol = next_col(ncol) col = next_col(col) for row_ in range(4, len(orf_)+5): ws[dcol+str(row_)] = float(ws[dcol+str(row_)].value) / book.nsheets if float(ws[dcol+str(row_)].value) != 0 else 0 ws[gcol+str(row_)] = str(round(float(ws[gcol+str(row_)].value) / float(ws[dcol+str(row_)].value), 3)) + '%' if float(ws[dcol+str(row_)].value) != 0 else '-' ws[d2col+str(row_)] = float(ws[dcol+str(row_)].value) / (float(ws[d2col+str(row_)].value) / 3) * 100 if float(ws[d2col+str(row_)].value) != 0 else 0 # end for s1 col = next_col(next_col(col)) infolog("End sheet {0}".format( 4 if title is "ORF" else 5))
def sheet3(self,ws, book): infolog("Start sheet3") for i in range(0, len(self.s1)): infolog("Writing {0} ~ {1}".format(self.s1[i], self.s2[i])) # r : (s1,s2)의 범위 별 데이터의 row 변수 r = i*(15+len(book.GenomeStructure) + len(book.RepeatRegion))+ 1 ws['A'+str(r)] = "Region" ws.merge_cells(start_row=r, start_column=1, end_row=r+2, end_column=2) ws['C'+str(r)] = "Dumas Length (bp)" ws.merge_cells(start_row=r, start_column=3, end_row=r+2, end_column=3) ws['A'+str(r+3)] = "Genome Structure" ws.merge_cells(start_row=r+3, start_column=1, end_row=r+3+len(book.GenomeStructure), end_column=1) ws['A'+str(r+10)] = "Repeat region" ws.merge_cells(start_row=r+4+len(book.GenomeStructure), start_column=1, end_row=r+4+len(book.GenomeStructure)+len(book.RepeatRegion), end_column=1) # B, C 컬럼 데이터 : GenomeStructure, RepeatRegion 종류와 길이 # G_rows : GenomeStructure데이터를 넣기 위한 row 범위 # R_rows : RepeatRegion데이터를 넣기 위한 row 범위 G_rows = list(range(r+3, r+3+len(book.GenomeStructure))) cnt_num = 0 for ix in range(0, len(book.GenomeStructure)): ws['B' + str(G_rows[ix])] = book.GenomeStructure[ix] nlen = len(book.Dumas[ book.Dumas[ book.col_GenomeStructure ] == book.GenomeStructure[ix]]) # dumas length 기준 ws['C' + str(G_rows[ix])] = nlen cnt_num += nlen ws['B' + str(r+3+len(book.GenomeStructure))] = "Total" ws['C' + str(r+3+len(book.GenomeStructure))] = cnt_num cnt_num = 0 nr = r+4+len(book.GenomeStructure)+len(book.RepeatRegion) R_rows = list(range(r+4+len(book.GenomeStructure), nr)) for ix in range(0, len(book.RepeatRegion)): ws['B' + str(R_rows[ix])] = book.RepeatRegion[ix] nlen = len(book.Dumas[ book.Dumas[ book.col_RepeatRegion ] == book.RepeatRegion[ix]]) ws['C'+str(R_rows[ix])] = nlen cnt_num += nlen ws['B'+str(nr)] = 'Total' ws['C' + str(nr)] = cnt_num ## ORF NCR ws["A"+str(nr+1)] = "ORF" ws["A"+str(nr+2)] = "NCR" # ORF NCR Full Length ws['C'+str(nr+1)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.ORF)]) ws['C'+str(nr+2)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.NCR)]) # end B, C columns # ncol = 'D' # ncol : Column of 'Number of GPS' is need when calculating 'Number of GPS / Length' ncol = col = 'D' ws[col+str(r)] = "Number of GPS" gcol = next_n_col(col, book.nsheets) for c in range(0, book.nsheets): ws[col+str(r+2)] = book.sheet_list[c] # 35 이상 s, l, minor_ = book.get_Number_of_GPS(book.BP35[c], self.s1[i], self.s2[i]) # maf_ = str(round(s/l, 3)) + '%' if l is not 0 else '-' tx_rows = book.BP35[c].loc[ minor_.index ] tx_orf = tx_rows[ tx_rows[ book.col_ORF ].isin(book.ORF)] tx_ncr = tx_rows[ tx_rows[ book.col_ORF ].isin(book.NCR)] ws.merge_cells(start_row=r, start_column=4, end_row=r, end_column=4 + book.nsheets) ws[col+str(r+1)] = book.filename ws.merge_cells(start_row=r+1, start_column=4, end_row=r+1, end_column=4 + book.nsheets) self.insert_value_in_cell(ws, book, G_rows, col, gcol, book.BP35[c], minor_, book.col_GenomeStructure , "GPS") self.insert_value_in_cell(ws, book, R_rows, col, gcol, book.BP35[c], minor_, book.col_RepeatRegion , "GPS") ws[col+str(nr+1)] = len(tx_orf) ws[col+str(nr+2)] = len(tx_ncr) #180612 ws[gcol+str(nr+1)] = float(ws[gcol+str(nr+1)].value) + len(tx_orf) if ws[gcol+str(nr+1)].value != None else len(tx_orf) ws[gcol+str(nr+2)] = float(ws[gcol+str(nr+2)].value) + len(tx_ncr) if ws[gcol+str(nr+2)].value != None else len(tx_ncr) col = next_col(col) col = next_col(col) # "Average MAF at GPS" ws[col+str(r)] = "Average MAF at GPS" dcol = next_n_col(col, book.nsheets) for c in range(0, book.nsheets): ws[col+str(r+2)] = book.sheet_list[c] ws.merge_cells(start_row=r, start_column=5 + book.nsheets, end_row=r, end_column=5 + 2 * book.nsheets) ws[col+str(r+1)] = book.filename ws.merge_cells(start_row=r+1, start_column=5 + book.nsheets, end_row=r+1, end_column=5 + 2 * book.nsheets) s, l, minor_ = book.get_Number_of_GPS(book.BP35[c], self.s1[i], self.s2[i]) # maf_ = str(round(s/l, 3)) + '%' if l is not 0 else '-' tx_rows = book.BP35[c].loc[ minor_.index ] tx_orf = tx_rows[ tx_rows[ book.col_ORF ].isin(book.ORF)] tx_ncr = tx_rows[ tx_rows[ book.col_ORF ].isin(book.NCR)] self.insert_value_in_cell(ws, book, G_rows, col, dcol, book.BP35[c], minor_, book.col_GenomeStructure , "MAF") self.insert_value_in_cell(ws, book, R_rows, col, dcol, book.BP35[c], minor_, book.col_RepeatRegion , "MAF") # 180612 maf_ = (divide(tx_orf[["minor"]], book.BP35[c][['sum']].loc[ tx_orf.index]) * 100).sum()[0] ws[col+str(nr+1)] = str(round( maf_ /len(tx_orf), 3)) +'%' if len(tx_orf) is not 0 else '-' ws[dcol+str(nr+1)] = float(ws[dcol+str(nr+1)].value) + maf_ if ws[dcol+str(nr+1)].value != None else maf_ maf_ = (divide(tx_ncr[["minor"]], book.BP35[c][['sum']].loc[ tx_ncr.index]) * 100).sum()[0] ws[col+str(nr+2)] = str(round( maf_ /len(tx_ncr), 3)) +'%' if len(tx_ncr) is not 0 else '-' ws[dcol+str(nr+2)] = float(ws[dcol+str(nr+2)].value) + maf_ if ws[dcol+str(nr+2)].value != None else maf_ col = next_col(col) col = next_col(col) # "Number of GPS / length" d2col = next_n_col(col, book.nsheets) ws[col+str(r)] = "Number of GPS / length * 100" for c in range(0, book.nsheets): ws[col+str(r+2)] = book.sheet_list[c] ws.merge_cells(start_row=r, start_column=6 + 2 *book.nsheets, end_row=r, end_column=6 + 3 *book.nsheets) ws[col+str(r+1)] = book.filename ws.merge_cells(start_row=r+1, start_column=6 + 2 *book.nsheets, end_row=r+1, end_column=6 + 3 *book.nsheets) ridx = sum_ = 0 for rs in range(r+3, r+5+len(book.GenomeStructure)+len(book.RepeatRegion)): if ridx < len(book.GenomeStructure): llen_ = len(book.BP35[c][ book.BP35[c][ book.col_GenomeStructure] == book.GenomeStructure[ridx]]) sum_ += llen_ elif ridx == len(book.GenomeStructure): llen_ = sum_ sum_ = 0 elif len(book.GenomeStructure) < ridx and ridx < len(book.GenomeStructure)+1+len(book.RepeatRegion): llen_ = len(book.BP35[c][ book.BP35[c][ book.col_RepeatRegion] == book.RepeatRegion[ridx-1-len(book.GenomeStructure)]]) sum_ += llen_ else: llen_ = sum_ ws[col + str(rs)] = (ws[ncol+str(rs)].value / llen_) * 100 if llen_ is not 0 else '-' ridx = ridx + 1 # 180612 강제지정 ws[d2col + str(rs)] = ws[d2col + str(rs)].value + llen_ if ws[d2col + str(rs)].value != None else llen_ # ORF & NCR for rs in range(2): tmp, rs_ = 0, r+5+len(book.GenomeStructure)+len(book.RepeatRegion) + rs if rs == 0: tmp = len(book.BP35[c][ book.BP35[c][book.col_ORF].isin(book.ORF)]) ws[col+str(rs_)] = (ws[ncol+str(rs_)].value / tmp) * 100 if tmp != 0 else '-' else: tmp = len(book.BP35[c][ book.BP35[c][book.col_ORF].isin(book.NCR)]) ws[col+str(rs_)] = (ws[ncol+str(rs_)].value / tmp) * 100 if tmp != 0 else '-' # 180612 강제지정 ws[d2col + str(rs_)] = ws[d2col + str(rs_)].value + tmp if ws[d2col + str(rs_)].value != None else tmp ncol = next_col(ncol) col = next_col(col) # 180612 강제지정 for rs in range(r+3, r+7+len(book.GenomeStructure)+len(book.RepeatRegion)): # 수정 : 위치를 올림, 원래 위치 [수정2] ws[dcol+str(rs)] = str(round(float(ws[dcol+str(rs)].value) / float(ws[gcol+str(rs)].value), 3)) + '%' if float(ws[dcol+str(rs)].value) != 0 else '-' ws[gcol+str(rs)].value = float(ws[gcol+str(rs)].value) / book.nsheets # 수정2 : ws[dcol+str(rs)] = str(round(float(ws[dcol+str(rs)].value) / float(ws[gcol+str(rs)].value), 3)) + '%' if float(ws[dcol+str(rs)].value) != 0 else '-' ws[d2col + str(rs)] = (float(ws[gcol + str(rs)].value) / ( float(ws[d2col + str(rs)].value) / book.nsheets)) * 100 if float(ws[d2col + str(rs)].value) != 0.0 else 0 col = next_col(col) ws[col+str(r+3)] = str(self.s1[i]) + "~" + str(self.s2[i]) + "%" #if self.s2[i] != 51.0 else str(self.s1[i]) + "% 이상" infolog("finished writing GenomeStructure & RepeatRegion") #### Full Sequence col = next_col(next_col(next_col(col))) self.Full_Seq_in_sheet3(ws, col, book) infolog("End sheet3")
def Full_Seq_in_sheet3(self, ws, col, book): infolog("Start Full_Seq_in_sheet3") # Writing list of GenomeStructure & RepeatRegion, NCR & ORF for r in range(4, 4+len(book.GenomeStructure)): ws[col+str(r)] = book.GenomeStructure[r-4] ws[col+str(4+len(book.GenomeStructure))] = 'Total' for r in range(0, len(book.RepeatRegion)): row = r+5+len(book.GenomeStructure) ws[col+str(row)] = book.RepeatRegion[r] ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = 'Total' rs_ = 5+len(book.GenomeStructure)+len(book.RepeatRegion) ws[col+str(rs_+1)], ws[col+str(rs_+2)] = 'ORF', 'NCR' # Writing the count of each sheets col = next_col(col) for i in range(0, book.nsheets): infolog("{0} Writing {1} sheet".format(time.time(), i)) ws[col+str(2)] = book.sheet_list[i] # 35 이하 포함 ws[col+str(3)] = '(BP_full)Length' sum_ = 0 for r in range(4, 4+len(book.GenomeStructure)): tmp = len(book.BPRaw[i][ book.BPRaw[i][ book.col_GenomeStructure] == book.GenomeStructure[r-4]]) if len(book.BPRaw[i])!=0 else 0 ws[col+str(r)] = tmp sum_ += tmp ws[col+str(4+len(book.GenomeStructure))] = sum_ sum_=0 for r in range(0, len(book.RepeatRegion)): row = r+5+len(book.GenomeStructure) tmp = len(book.BPRaw[i][ book.BPRaw[i][ book.col_RepeatRegion] == book.RepeatRegion[r]]) if len(book.BPRaw[i])!=0 else 0 ws[col+str(row)] = tmp sum_ += tmp ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = sum_ # ORF & NCR ws[col+str(rs_+1)] = len(book.BPRaw[i][ book.BPRaw[i][ book.col_ORF].isin(book.ORF)]) if len(book.BPRaw[i])!=0 else 0 ws[col+str(rs_+2)] = len(book.BPRaw[i][ book.BPRaw[i][ book.col_ORF].isin(book.NCR)]) if len(book.BPRaw[i])!=0 else 0 # sum of basepair 35 이상 col = next_col(col) ws[col+str(3)] = '(BP_35이상)Length' sum_ = 0 for r in range(4, 4+len(book.GenomeStructure)): tmp = len(book.BP35[i][ book.BP35[i][ book.col_GenomeStructure] == book.GenomeStructure[r-4]]) if len(book.BPRaw[i])!=0 else 0 ws[col+str(r)] = tmp sum_ += tmp ws[col+str(4+len(book.GenomeStructure))] = sum_ sum_=0 for r in range(0, len(book.RepeatRegion)): row = r+5+len(book.GenomeStructure) tmp = len(book.BP35[i][ book.BP35[i][ book.col_RepeatRegion] == book.RepeatRegion[r]]) if len(book.BP35[i])!=0 else 0 ws[col+str(row)] = tmp sum_ += tmp ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = sum_ ws[col+str(rs_+1)] = len(book.BP35[i][ book.BP35[i][ book.col_ORF ].isin(book.ORF)]) if len(book.BP35[i])!=0 else 0 ws[col+str(rs_+2)] = len(book.BP35[i][ book.BP35[i][ book.col_ORF ].isin(book.NCR)]) if len(book.BP35[i])!=0 else 0 col = next_col(col) infolog("End Full_Seq_in_sheet3")