def maf5list(self, ws, book): from analyzer import dataframe_to_rows from pandas import merge infolog("writing list of data") P0, merged = None, None dumascol = [book.col_GenomeStructure, book.col_RepeatRegion, book.col_ORF, book.col_DumaPosition, book.col_DumaSeq] for i in range(book.nsheets): book.BP35[i]['maf'] = divide(book.BP35[i][['minor']], book.BP35[i][['sum']]) * 100 if merged is not None: merged = merge(merged, book.BP35[i][ book.BP35[i]['maf']>=5.0], on=dumascol, how='outer', right_index=True, left_index=True) else: merged = book.BP35[i][ book.BP35[i]['maf']>= 5.0 ] merged = merged[book.col_DumaPosition].values.tolist() for i in range(book.nsheets): if P0 is not None: P0 = merge(P0, book.BP35[i][ book.BP35[i][book.col_DumaPosition].isin(merged) ], on=dumascol, how='outer', left_index=True, right_index=True) else: P0 = book.BP35[i][ book.BP35[i][book.col_DumaPosition].isin(merged) ] # P0.to_excel('a.xlsx') # bp = bp[ self.dumascol + []] rows = dataframe_to_rows(P0, index=False) for r_idx, row in enumerate(rows, 1): for c_idx, value in enumerate(row, 1): if str(value) != 'nan': ws.cell(row=r_idx, column=c_idx, value=value)
def sheet6(self,ws, book): infolog("Start sheet6") col = 'B' for i in range(0, len(self.s1)): ws[col+'2'] = str(self.s1[i]) + '~' + str(self.s2[i]) +'%' # if self.s2[i] != 51.0 else str(self.s1[i]) + '%~' # get basepair between s1 and s2 mxr = [] for n in range(0, book.nsheets): _, _, minor_ = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i]) mxr.append(book.BP35[n].loc[minor_.index]) for x in range(-1, book.nsheets): ws[col+'3'] = book.sheet_list[x] if x != -1 else "Major/Minor" rows = 4 for a in range(0, len(book.col_basepair)): for b in range(0, len(book.col_basepair)): if a==b: continue ws[col+str(rows)] = len(mxr[x][logical_and( mxr[x]['major_idx']==a, mxr[x]['minor_idx']==b )]) if x != -1 else book.col_basepair[a] + '/' + book.col_basepair[b] rows = rows + 1 col = next_col(col) col = next_col(col) infolog("End sheet6")
def mergesequence(self): from analyzer import infolog from pandas import merge infolog("mergesequence start") # print(type(self.sheet_list), len(self.sheet_list)) for sheet in self.sheets: # sheet = self.xls.parse(x) # print(type(sheet)) if self.P0 is None: self.P0 = sheet else: self.P0 = merge(self.P0, sheet, how='outer', on=[ self.col_GenomeStructure, self.col_RepeatRegion, self.col_ORF, self.col_DumaPosition, self.col_DumaSeq ]) sheet = sheet[sheet[self.col_Sequence] != '-'] self.BPRaw.append(sheet) self.BPRawLength.append(len(sheet)) self.Dumas = self.P0[self.P0[self.col_DumaSeq] != '-'] infolog("mergesequence end")
def BaseComp(self, ws, types_, book): """ major가 같은 것, minor도 같은 것만 추출 minor x -> 증가 포함 1. major_idx_x == major_idx_y - 치환 제거 2. (minor_idx_x == minor_idx_y) or ((minor_idx_x != minor_idx_y) and minor_x == 0) """ infolog("lowhigh BaseComp start") ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1) ws['B1'], ws['B2'] = 'Major', 'Minor' ws['D1'], ws['H1'], ws['L1'], ws['P1'] = 'A', 'G', 'C', 'T' ws['G2'], ws['K2'], ws['O2'] = 'a', 'a', 'a' ws['C2'], ws['L2'], ws['P2'] = 'g', 'g', 'g' ws['D2'], ws['H2'], ws['Q2'] = 'c', 'c', 'c' ws['E2'], ws['I2'], ws['M2'] = 't', 't', 't' cols = ['A','G','C','T'] bp = self.BPmergedinc if types_ is "INC" else self.BPmergeddec for r in range(book.nsheets): ws['A'+str(r+3)] = book.sheet_list[r] for r in range(book.nsheets): col = 'C' cond1 = bp[r]['major_idx_x'] == bp[r]['major_idx_y'] cond2 = bp[r]['minor_idx_x'] == bp[r]['minor_idx_y'] cond3 = logical_and( bp[r]['minor_idx_x'] != bp[r]['minor_idx_y'], bp[r]['minor_x'] == 0 ) idx = logical_and(cond1, logical_or(cond2, cond3)) tmp = bp[r][ idx.values ] for a in range(len(cols)): for b in range(len(cols)): if a == b: continue # major는 _x, _y 가 이미 같고, minor는 _y기준으로 하면 _x에서 minor가 0이던값 무시됨 ws[col+str(r+3)] = len(tmp[ logical_and(tmp['major_idx_y'] == a, tmp['minor_idx_y'] == b).values ]) col = next_col(col) col = next_col(col) infolog("lowhigh BaseComp end")
def Init_Individual(self, book): infolog("lowhigh init start") sheet_names = [] BPmaf = [] for bp in book.BP35: #get maf 5% infolog("bp 5.0 ~ 50.0") s, l, minor_ = book.get_Number_of_GPS(bp, 5.0, 51.0) BPmaf.append(bp.loc[minor_.index]) for i, x in enumerate(BPmaf): for j, y in enumerate(BPmaf): if i >= j: continue infolog("{0} -> {1}".format(i,j)) sheet_names.append(book.sheet_list[i]+'->'+book.sheet_list[j]) # low의 maf5%이상과 high의 maf5%이상 값을 병합 # 둘 중 하나라도 sum >= 35.0 and maf 5% 이상인 값에서 분석을 진행 dumaspos = pd.merge(x,y, how='outer', on=self.dumascol, suffixes=('_x', '_y'), right_index=True, left_index=True)[[book.col_DumaPosition]] # dumas position 기준으로 양쪽 위치 추출 x1 = book.BP35[i][ book.BP35[i][book.col_DumaPosition].isin(dumaspos[book.col_DumaPosition].values.tolist())] if len(dumaspos) is not 0 else pd.DataFrame() y1 = book.BP35[j][ book.BP35[j][book.col_DumaPosition].isin(dumaspos[book.col_DumaPosition].values.tolist())] if len(dumaspos) is not 0 else pd.DataFrame() merged = pd.merge(x1,y1, how='outer', on=self.dumascol, suffixes=('_x', '_y'), right_index=True, left_index=True) if(len(x1) is 0 or len(y1) is 0): self.BPmergedinc.append(pd.DataFrame()) self.BPmergeddec.append(pd.DataFrame()) continue x_maf = divide(x1[['minor']], x1[['sum']]) * 100 y_maf = divide(y1[['minor']], y1[['sum']]) * 100 # 증가하는 위치와 감소하는 위치 ## 양쪽에 서로 없는 index가 있으면 Error - 한쪽에 값이 없으면 NaN : dropna함수로 제거 merged['diffofinc'] = y_maf - x_maf merged['diffofdec'] = x_maf - y_maf cond1 = merged['major_idx_x'] == merged['major_idx_y'] cond2 = merged['diffofinc'] >= 5.0 cond3 = merged['diffofdec'] >= 5.0 # 한쪽에 값이 없으면 drop되기 때문에 어디쪽에 해도 상관은 없음 self.BPmergedinc.append(pd.DataFrame.dropna(merged[ logical_and(cond1, cond2).values ], how='all')) self.BPmergeddec.append(pd.DataFrame.dropna(merged[ logical_and(cond1, cond3).values ], how='all')) book.sheet_list = sheet_names book.nsheets = len(sheet_names) infolog("lowhigh init end")
def sheet2(self, ws, book): infolog("Start Sheet2") ws['A1'] = "strain" ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2) ws['C1'] = "Genome length (bp)" ws.merge_cells(start_row=1, start_column=3, end_row=2, end_column=3) ws['D1'] = "Range of minor allele frequency(%)" ws.merge_cells(start_row=1, start_column=4, end_row=1, end_column=4+len(self.s1)) col = 'D' for ss1, ss2 in zip(self.s1, self.s2): ws[col+'2'] = str(ss1) + '≤f<' + str(ss2) col = next_col(col) for i in range(book.nsheets): infolog("Writing {0} sheet".format(book.sheet_list[i])) ws['A' + str(i+3)] = book.sheet_list[i] ws['C' + str(i+3)] = book.BPRawLength[i] ws.merge_cells(start_row=3+i, start_column=1, end_row=3+i, end_column=2) col = 'D' for r1, r2 in zip(self.s1, self.s2): s, l, _ = book.get_Number_of_GPS(book.BP35[i], r1, r2) if len(book.BPxMinor[i])!=0 else (0,0) ws[col + str(i+3)] = str(round(s/l, 3))+'%' if l is not 0 else '-' col = next_col(col) infolog("End Sheet2")
def PolymorphicSite(self, ws, types_, book): infolog("lowhigh PolymorphicSite start") ws.title = "INC-Polymorphic site" # A col ws['A1'] = types_ ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2) # C col ws['C1'] = "Increase in genetic polymrphism (%)" ws.merge_cells(start_row=1, start_column=3, end_row=1, end_column=6) from numpy import logical_and for i in range(book.nsheets): ws['A' + str(i+3)] = book.sheet_list[i] ws.merge_cells(start_row=i+3, start_column=1, end_row=i+3, end_column=2) col = 'C' bp = self.BPmergedinc[i][['diffofinc']] if types_ == 'INC' else self.BPmergeddec[i][['diffofdec']] for s1_, s2_ in zip(self.s1, self.s2): ws[col+'2'] = str(s1_) + '≤n<' + str(s2_) # 'Sum' if s1_ == 5.0 and s2_ == 51.0 else str(s1_) + '≤n<' + str(s2_) ws[col+str(i+3)] = len(bp[ logical_and(bp >= s1_, bp < s2_).values]) if len(bp) is not 0 else 0 col = next_col(col) infolog("lowhigh PolymorphicSite end")
def preprocessing(self): from analyzer import infolog from pandas import DataFrame infolog("preprocessing start") for bp in self.BPRaw: self.BPRawLength.append(len(bp)) # 염기 A, G, C, T의 합계 컬럼 추가 bp["sum"] = bp[self.col_basepair].sum(axis=1) # 35bp bp = bp[[ self.col_GenomeStructure, self.col_RepeatRegion, self.col_DumaPosition, self.col_DumaSeq, self.col_ORF, self.col_Sequence, "sum" ] + self.col_basepair] # sum 값이 35 이상인 데이터 추출 # constraions = 35.0 bp = bp[bp["sum"] >= self.constraints] pmajor, pminor = self.get_major_minor(bp[self.col_basepair]) # major, minor의 값을 추출 self.BPxMajor.append(pmajor) self.BPxMinor.append(pminor) # pminor, pmajor가 값을 가지지 않을 때 에러 발생 bp['minor'] = pminor if len(pminor) is not 0 else 0 bp['major'] = pmajor if len(pmajor) is not 0 else 0 self.BP35.append(bp) # major, minor의 인덱스 컬럼 추가 for bp in self.BP35: argx = -bp[self.col_basepair] # A, G, C, T의 순으로 우선순위를 위해 음수로 정렬 bps = argx.values.argsort(axis=1) bp['minor_idx'] = DataFrame(bps[:, 1], index=bp.index) bp['major_idx'] = DataFrame(bps[:, 0], index=bp.index) infolog("preprocessing end")
def sheet1(self,ws, book): infolog("Start Sheet1") ws.title = "Polymorphic site" ws['A1'] = "strain" ws['C1'] = "Genome length (bp)" ws['D1'] = "Average of MAF" ws['E1'] = "Number of polymorphic site" ws.merge_cells(start_row=1, start_column=5, end_row=1, end_column=5+len(self.s1)) ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2) ws.merge_cells(start_row=1, start_column=3, end_row=2, end_column=3) ws.merge_cells(start_row=1, start_column=4, end_row=2, end_column=4) col = 'E' for ci in range(len(self.s1)): ws[col+'2'] = str(self.s1[ci]) + '≤n<' + str(self.s2[ci]) col = next_col(col) for ni in range(book.nsheets): infolog("Writing {} sheet".format(book.sheet_list[ni])) ws['A' + str(ni+3)] = book.sheet_list[ni] ws.merge_cells(start_row=3+ni, start_column=1, end_row=3+ni, end_column=2) ws['C' + str(ni+3)] = book.BPRawLength[ni] s, l, _ = book.get_Number_of_GPS(book.BP35[ni], 5.0, 51.0) ws['D' + str(ni+3)] = str(round(s / l, 3)) + '%' col = 'E' for ss1, ss2 in zip(self.s1, self.s2): ws[col + str(ni+3)] = book.get_Number_of_GPS(book.BP35[ni], ss1, ss2)[1] col = next_col(col) # ws['E2'] = "2.5≤f<5" # ws['F2'] = "5≤f<15" # ws['G2'] = "15≤f<25" # ws['H2'] = "25≤f" # ws['I2'] = "sum" # # A col # for i in range(book.nsheets): # infolog("{0} Writing {1} sheet".format(time.time(), book.sheet_list[i])) # ws['A' + str(i+3)] = book.sheet_list[i] # ws.merge_cells(start_row=3+i, start_column=1, end_row=3+i, end_column=2) # ws['C' + str(i+3)] = book.BPRawLength[i] # s, l, _ = book.get_Number_of_GPS(book.BP35[i], 5.0, 51.0) # ws['D' + str(i+3)] = str(round(s / l, 3)) + '%' # ws['E' + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 2.5, 5.0)[1] # ws["F" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 5.0, 15.0)[1] # ws["G" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 15.0, 25.0)[1] # ws["H" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 25.0, 51.0)[1] # ws["I" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 5.0, 51.0)[1] infolog("End Sheet1")
def sheet4_5(self, ws, title, book): infolog("Start sheet {0}".format(4 if title is "ORF" else 5)) ws['B2'] = book.filename ws["C2"] = "(BP_full)Length" ws.merge_cells(start_row=2, start_column=3, end_row=2, end_column=3+book.nsheets-1) ws["B3"] = title col = 'C' for i in range(0, book.nsheets): ws[col + str(3)] = book.sheet_list[i] col = next_col(col) # Full - 35이하 포함 col = 'B' orf_ = book.ORF if title is 'ORF' else book.NCR rows = list(range(4, len(orf_) + 4)) for c in range(0, book.nsheets+1): cnt_ = 0 for i in range(0, len(orf_)): if c is 0: ws[col+str(rows[i])] = orf_[i] else: tmp = len(book.BPRaw[c-1][ book.BPRaw[c-1][book.col_ORF] == orf_[i] ]) ws[col+str(rows[i])] = tmp cnt_ += tmp ws[col + str(4+len(orf_))] = cnt_ if c is not 0 else 'total' col = next_col(col) # s1~s2 범위 col = chr(ord('A')+3+book.nsheets) for i in range(0, len(self.s1)): co = (5+3*book.nsheets) * i + (5+book.nsheets) ws[col+str(2)] = str(self.s1[i]) + '~' + str(self.s2[i]) +'%' if self.s2[i] != 51.0 else str(self.s1[i]) + '%~' ws[col+str(3)] = title # NCR or ORF for ix in range(0, len(orf_)): ws[col+str(rows[ix])] = orf_[ix] ws[col+str(len(orf_) + 4)] = 'total' # Number of GPS of each sheets gps = list() col = next_col(col) ws[col+str(2)] = "Number of GPS" ws.merge_cells(start_row=2, start_column=co, end_row=2, end_column=co+book.nsheets) dcol = next_n_col(col, book.nsheets) for n in range(0, book.nsheets): ws[col+str(3)] = book.sheet_list[n] _, _, tx_minor = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i]) tx_rows = book.BP35[n].loc[ tx_minor.index ] g = [] cnt_ = 0 for ix in range(0, len(orf_)): mrows = tx_rows[ tx_rows[book.col_ORF] == orf_[ix] ] ws[col+str(rows[ix])] = len(mrows) cnt_ += len(mrows) g.append(len(mrows)) # 180612 ws[dcol+str(rows[ix])] = float(ws[dcol+str(rows[ix])].value) + len(mrows) if ws[dcol+str(rows[ix])].value != None else len(mrows) ws[col+str(len(orf_) + 4)] = cnt_ ws[dcol+str(len(orf_) + 4)] = float(ws[dcol+str(len(orf_) + 4)].value) + cnt_ if ws[dcol+str(len(orf_) + 4)].value != None else cnt_ g.append(cnt_) gps.append(g) col = next_col(col) col = next_col(col) gcol = next_n_col(col, book.nsheets) # Average MAF at GPS of each sheets ws[col+str(2)] = "Average MAF at GPS" ws.merge_cells(start_row=2, start_column=co+book.nsheets+1, end_row=2, end_column=co+2*book.nsheets+1) for n in range(0, book.nsheets): val, cnt_ = 0, 0 ws[col+str(3)] = book.sheet_list[n] _, _, tx_minor = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i]) tx_rows = book.BP35[n].loc[ tx_minor.index ] for ix in range(0, len(orf_)): mrows = tx_rows[ tx_rows[book.col_ORF] == orf_[ix] ] if len(mrows) is 0: ws[col+str(rows[ix])] = '-' # 180612 ws[gcol+str(rows[ix])] = ws[gcol+str(rows[ix])].value if ws[gcol+str(rows[ix])].value != None else 0 else: maf_ = divide(mrows[["minor"]], mrows[["sum"]]) * 100 val += maf_.sum()[0] cnt_ += len(maf_) ws[col+str(rows[ix])] = str(round(maf_.sum()[0] / len(maf_), 3)) + '%' # 180612 ws[gcol+str(rows[ix])] = float(ws[gcol+str(rows[ix])].value) + maf_.sum()[0] if ws[gcol+str(rows[ix])].value != None else maf_.sum()[0] # total ws[col+str(len(orf_)+4)] = str(round(val / cnt_, 3)) + '%' if cnt_ is not 0 else '-' ws[gcol+str(len(orf_)+4)] = float(ws[gcol+str(len(orf_)+4)].value) + val if ws[gcol+str(len(orf_)+4)].value != None else val col = next_col(col) col = next_col(col) # "Number of GPS / Length" of each sheets ws[col+str(2)] = "Number of GPS / Length * 100" ws.merge_cells(start_row=2, start_column=co+2*book.nsheets+2, end_row=2, end_column=co+3*book.nsheets+2) ncol = 'C' d2col = next_n_col(col, book.nsheets) for n in range(0, book.nsheets): ws[col+str(3)] = book.sheet_list[n] for ix in range(0, len(orf_)): l_ = int(str(ws[ncol+str(rows[ix])].value)) ws[col+str(rows[ix])] = (gps[n][ix] / l_)*100 if l_ is not 0 else '-' ws[d2col+str(rows[ix])] = float(ws[d2col+str(rows[ix])].value) + l_ if ws[d2col+str(rows[ix])].value != None else l_ # total l_ = int(str(ws[ncol+str(len(orf_)+4)].value)) ws[col+str(len(orf_)+4)] = (gps[n][-1] / l_)*100 if l_ is not 0 else '-' ws[d2col+str(len(orf_)+4)] = float(ws[d2col+str(len(orf_)+4)].value) + l_ if ws[d2col+str(len(orf_)+4)].value != None else l_ ncol = next_col(ncol) col = next_col(col) for row_ in range(4, len(orf_)+5): ws[dcol+str(row_)] = float(ws[dcol+str(row_)].value) / book.nsheets if float(ws[dcol+str(row_)].value) != 0 else 0 ws[gcol+str(row_)] = str(round(float(ws[gcol+str(row_)].value) / float(ws[dcol+str(row_)].value), 3)) + '%' if float(ws[dcol+str(row_)].value) != 0 else '-' ws[d2col+str(row_)] = float(ws[dcol+str(row_)].value) / (float(ws[d2col+str(row_)].value) / 3) * 100 if float(ws[d2col+str(row_)].value) != 0 else 0 # end for s1 col = next_col(next_col(col)) infolog("End sheet {0}".format( 4 if title is "ORF" else 5))
def Init_Average(self, book): # book.save_data(book, 'Average_book_vaccine.pkl') for sn in book.sheet_list: assert sn[0] == 'A' or sn[0] == 'B', "sheet name" # for i in range(0,len(book.nsheets)): # book.BPRaw[i]['sum_'+ book.sheet_list[i]] = book.BPRaw[i][book.basepair].sum(axis=1) # A & B 위치별 MAF 평균 # Avg(A) ~ Avg(B)의 변화 분석 infolog("Init_Average") # # 필요 최종 column : dumacol(5), diffofdec, diffofinc, minor_idx_x, minor_idx_y, ->major(2) / minor.. # # 필요 값 : any maf 5% 이상, all maf 5% 이상, 값이 있는 곳 ## 정한 값 : minor_idx_x -> A의 가장 마지막.. / minor_idx_y -> B의 가장 마지막 ## minor_x -> A의 minor 평균 / minor_y -> B의 minor 평균 ## for j, i in enumerate(book.BP35): i['maf_'+book.sheet_list[j]] = divide(i[['minor']], i[['sum']]) * 100 i[book.sheet_list[j][0].upper()+'_minor_idx'] = i[['minor_idx']] book.BP35[j] = i.drop(columns='minor_idx') merged = book.BP35[0].drop(columns=['seq', 'sum', 'A', 'G', 'C', 'T', 'major']) merged['maf_'+book.sheet_list[0][0].upper()] = book.BP35[0]['maf_'+book.sheet_list[0]] for i in range(1, book.nsheets): book.BP35[i] = book.BP35[i].drop(columns=['seq', 'sum', 'A', 'G', 'C', 'T', 'major']) grp = book.sheet_list[i][0].upper() col_maf = 'maf_' + grp col_minor_idx = grp+'_minor_idx' merged = pd.merge(merged, book.BP35[i], on=self.dumascol, how='outer') if not merged.columns.contains(col_maf): merged[col_maf] = merged['maf_' + book.sheet_list[i] ] else : merged[col_maf] = merged[[col_maf, 'maf_'+book.sheet_list[i]]].sum(axis=1) if merged.columns.contains(col_minor_idx+'_x'): merged[col_minor_idx] = merged[col_minor_idx+'_y'] merged = merged.drop(columns=col_minor_idx+'_x').rename(index=str, columns={col_minor_idx+'_y' : col_minor_idx}) merged = merged[ merged['major_idx_x'] == merged['major_idx_y'] ] merged = merged.drop(columns = 'major_idx_x').rename(index=str, columns={"major_idx_y": "major_idx"}) Acol, Bcol = [], [] for i in range(len(book.sheet_list)): if book.sheet_list[i][0].upper() == 'A': Acol.append('maf_'+book.sheet_list[i]) else: Bcol.append('maf_'+book.sheet_list[i]) merged['maf_A'] = merged['maf_A'] / len(Acol) merged['maf_B'] = merged['maf_B'] / len(Bcol) A_all, A_any = True, False B_all, B_any = True, False for i in Acol: A_all = logical_and( A_all, merged[i] >= 5.0 ) A_any = logical_or( A_any, merged[i] >= 5.0 ) for i in Bcol: B_all = logical_and( B_all, merged[i] >= 5.0 ) B_any = logical_or( B_any, merged[i] >= 5.0 ) # A, B = [], [] merged['minor_idx_x'] = merged['A_minor_idx'].T[-1:].T merged['minor_idx_y'] = merged['B_minor_idx'].T[-1:].T merged['major_idx_x'] = merged['major_idx'] merged['major_idx_y'] = merged['major_idx'] merged = merged.drop(columns = ['A_minor_idx', 'B_minor_idx']) tmpx = merged['minor_x'].sum(axis=1) / len(Acol) tmpy = merged['minor_y'].sum(axis=1) / len(Bcol) merged = merged.drop(columns=['minor_x', 'minor_y']) merged['minor_x'] = tmpx merged['minor_y'] = tmpy merged['diffofinc'] = merged['maf_B'] - merged['maf_A'] merged['diffofdec'] = merged['maf_A'] - merged['maf_B'] cond2 = merged['diffofinc'] >= 5.0 cond3 = merged['diffofdec'] >= 5.0 maf_5_all = merged[ logical_and(A_all, B_all).values ] maf_5_any = merged[ logical_or(A_any, B_any).values ] merged.to_excel('{}_merged.xlsx'.format(book.filename)) maf_5_all.to_excel('{}_maf_5_all.xlsx'.format(book.filename)) maf_5_any.to_excel('{}_maf_5_any.xlsx'.format(book.filename)) # print(len(merged), len(maf_5_all), len(maf_5_any)) self.BPmergedinc.append( merged) self.BPmergedinc.append(maf_5_all) self.BPmergedinc.append(maf_5_any) self.BPmergeddec.append( merged) self.BPmergeddec.append(maf_5_all) self.BPmergeddec.append(maf_5_any) book.sheet_list = ['All', 'All_5', 'Any_5'] book.nsheets = len(book.sheet_list) infolog("End Init_Average")
def ORFNCR(self, ws, types_, title, book): infolog("lowhigh {0} start".format(title)) ws['A1'] = title ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1) ws['B1'] = 'Length' ws.merge_cells(start_row=1, start_column=2, end_row=2, end_column=2) orfncr = book.ORF if title is 'ORF' else book.NCR col_diff = "diffofinc" if types_ == 'INC' else "diffofdec" bp = self.BPmergedinc if types_ == 'INC' else self.BPmergeddec sum_, col = 0, 'B' for si in range(-1, book.nsheets): sum_ = 0 if si != -1: ws[col+'2'] = book.sheet_list[si] for r in range(len(orfncr)): if si == -1: ws['A'+str(r+3)] = orfncr[r] cnt = len(book.Dumas[ book.Dumas[book.col_ORF] == orfncr[r] ]) ws[col+str(r+3)] = cnt sum_ += cnt else: cnt = len( bp[si][ bp[si][book.col_ORF] == orfncr[r] ] ) ws[col+str(r+3)] = cnt sum_ += cnt ws[col+str(3+len(orfncr))] = sum_ col = next_col(col) ws['A'+str(3+len(orfncr))] = 'Total' col = next_col(col) for i in range(len(self.s1)): # ORF or CNR 종류 ws[col+'1'] = str(self.s1[i])+'~'+str(self.s2[i]) #if self.s2[i]!=51.0 else str(self.s1[i])+'~' for r in range(len(orfncr)): ws[col+str(r+3)] = orfncr[r] col = next_col(col) col_GPS = col ws[col+'1'] = '{0} in genetic polymorphism'.format("Increase" if types_ == 'INC' else "Decrease") for s in range(book.nsheets): ws[col+'2'] = book.sheet_list[s] bpx = bp[s][ logical_and(bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i]).values ] sum_ = 0 for r in range(3, 3+len(orfncr)): cnt = len(bpx[ bpx[book.col_ORF] == orfncr[r-3]]) ws[col+str(r)] = cnt sum_ += cnt ws[col+str(3+len(orfncr))] = sum_ col = next_col(col) ws[col+'1'] = 'Average {0}'.format("Increase" if types_ == 'INC' else "Decrease") for s in range(book.nsheets): ws[col+'2'] = book.sheet_list[s] bpx = bp[s][ logical_and(bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i]).values ] sum_ = 0 for r in range(3, 3+len(orfncr)): cnt = bpx[ bpx[book.col_ORF] == orfncr[r-3]][[col_diff]] ws[col+str(r)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A' sum_ += cnt.sum()[0] col = next_col(col) ws[col+'1'] = 'Number of GPS / Length' for s in range(book.nsheets): ws[col+'2'] = book.sheet_list[s] for r in range(3, 3+len(orfncr)): s, l = float(ws[col_GPS+str(r)].value), float(ws['B'+str(r)].value) ws[col+str(r)] = str( round(s/l, 6) ) if l != 0 else 'N/A' col_GPS = next_col(col_GPS) col = next_col(col) col = next_col(col) infolog("lowhigh {0} end".format(title))
def GenomeStr(self, ws, types_, book): infolog("lowhigh GenomeStr start") bp = self.BPmergedinc if types_ == 'INC' else self.BPmergeddec col_diff = "diffofinc" if types_ == 'INC' else "diffofdec" # s1 ~ s2 for i in range(len(self.s1)): rows = i*(14+len(book.GenomeStructure) + len(book.RepeatRegion))+ 1 ws['A'+str(rows)] = 'Region' ws.merge_cells(start_row=rows, start_column=1, end_row=rows+1, end_column=1) ws['B'+str(rows)] = 'Dumas Length' ws.merge_cells(start_row=rows, start_column=2, end_row=rows+1, end_column=2) for r in range(2, 2+len(book.GenomeStructure)): ws['A' + str(rows+r)] = book.GenomeStructure[r-2] ws['B' + str(rows+r)] = len(book.Dumas[ book.Dumas[book.col_GenomeStructure] == book.GenomeStructure[r-2] ]) ws['A' + str(rows+2+len(book.GenomeStructure))] = 'Total' ws['B' + str(rows+2+len(book.GenomeStructure))] = len(book.Dumas) sum_ = 0 for r in range(len(book.RepeatRegion)): row = rows+r+3+len(book.GenomeStructure) ws['A' + str(row)] = book.RepeatRegion[r] cnt = len(book.Dumas[ book.Dumas[book.col_RepeatRegion] == book.RepeatRegion[r] ]) ws['B' + str(row)] = cnt sum_ += cnt leng = rows+ len(book.GenomeStructure) + len(book.RepeatRegion) ws['A' + str(leng+3)] = 'Total' ws['B' + str(leng+3)] = sum_ ws['A' + str(leng+4)] = 'ORF' ws['B' + str(leng+4)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.ORF)]) ws['A' + str(leng+5)] = 'NCR' ws['B' + str(leng+5)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.NCR)]) col = 'C' ws[col+str(rows)] = '{0} in genetic polymorphism'.format("Increase" if types_ is 'INC' else "Decrease") ws.merge_cells(start_row=rows, start_column=3, end_row=rows, end_column=2+book.nsheets) ws[chr(ord(col)+book.nsheets)+str(rows)] = "Average {0}".format("Increase" if types_ is 'INC' else "Decrease") ws.merge_cells(start_row=rows, start_column=3+book.nsheets, end_row=rows, end_column=2+2*book.nsheets) for s in range(book.nsheets): idx = logical_and( bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i] ) bpx = bp[s][idx.values] ncol = chr(ord(col)+book.nsheets) ws[col+str(rows+1)] = book.sheet_list[s] ws[chr(ord(col)+book.nsheets)+str(rows+1)] = book.sheet_list[s] sum_, avg_ = 0, 0 for r in range(2, 2+len(book.GenomeStructure)): cnt = bpx[ bpx[book.col_GenomeStructure] == book.GenomeStructure[r-2]][[col_diff]] ws[col+str(rows+r)] = len(cnt) ws[ncol+str(rows+r)] = str(round((cnt.sum()[0] / len(cnt)), 3))+'%' if len(cnt) is not 0 else 'N/A' avg_ += cnt.sum()[0] sum_ += len(cnt) # Total ws[col+str(rows+2+len(book.GenomeStructure))] = sum_ ws[chr(ord(col)+book.nsheets)+str(rows+2+len(book.GenomeStructure))] = str(round(avg_/sum_, 3))+'%' if sum_ is not 0 else 'N/A' sum_, avg_ = 0, 0 for r in range(len(book.RepeatRegion)): row = rows+r+3+len(book.GenomeStructure) cnt = bpx[ bpx[book.col_RepeatRegion] == book.RepeatRegion[r]][[col_diff]] ws[col+str(row)] = len(cnt) ws[ncol+str(row)] = str(round((cnt.sum()[0] / len(cnt)),3))+'%' if len(cnt) is not 0 else 'N/A' sum_ += len(cnt) avg_ += cnt.sum()[0] ws[col+str(3+leng)] = sum_ ws[ncol+str(3+leng)] = str(round(avg_/sum_,3))+'%' if sum_ is not 0 else 'N/A' # ORF & NCR cnt = bpx[ bpx[book.col_ORF].isin(book.ORF)][[col_diff]] ws[col+str(4+leng)] = len(cnt) ws[ncol+str(4+leng)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A' cnt = bpx[ bpx[book.col_ORF].isin(book.NCR)][[col_diff]] ws[col+str(5+leng)] = len(cnt) ws[ncol+str(5+leng)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A' col = next_col(col) ncol = chr(ord(ncol)+book.nsheets) ws[ncol+str(rows+2)] = str(self.s1[i])+"~"+str(self.s2[i]) #if self.s2[i] != 51.0 else str(self.s1[i])+"~" infolog("lowhigh GenomeStr end")
def sheet7(self, ws, book): infolog("Start sheet7") for i in range(0, len(self.s1)): r = i * (book.nsheets + 12) + 2 ws["A"+str(r)] = str(self.s1[i]) + "~" + str(self.s2[i]) + "%" #if self.s2[i] != 51.0 else str(self.s1[i]) + "%~" ws["B"+str(r)] = "Virus" ws.merge_cells(start_row=r, start_column=2, end_row=r+2, end_column=2) ws["C"+str(r)] = "Number of GPS" ws.merge_cells(start_row=r, start_column=3, end_row=r+2, end_column=3) ws["D"+str(r)] = "GPS Mean" ws.merge_cells(start_row=r, start_column=4, end_row=r+2, end_column=4) ws["E"+str(r)] = "Major" ws.merge_cells(start_row=r, start_column=5, end_row=r+1, end_column=5) ws["E"+str(r+2)] = "Minor" ws['F'+str(r)] = 'A' ws.merge_cells(start_row=r, start_column=6, end_row=r+1, end_column=8) ws['I'+str(r)] = 'G' ws.merge_cells(start_row=r, start_column=9, end_row=r+1, end_column=11) ws['L'+str(r)] = 'C' ws.merge_cells(start_row=r, start_column=12, end_row=r+1, end_column=14) ws['O'+str(r)] = 'T' ws.merge_cells(start_row=r, start_column=15, end_row=r+1, end_column=17) ws['E'+str(r+3+book.nsheets)] = 'sum' for x in range(0, book.nsheets): row, mafrow = r+x+3, r+book.nsheets+x+5 s, l, minor_ = book.get_Number_of_GPS(book.BP35[x], self.s1[i], self.s2[i]) mxr = book.BP35[x].loc[minor_.index] # s, l = self.book.get_Number_of_GPS(self.PxMinor[x], self.PxSum[x], self.s1[i], self.s2[i]) if len(self.PxMinor[x])!=0 else (0,0) ws["B"+str(row)] = book.sheet_list[x] ws['C'+str(row)] = len(minor_) ws['D'+str(row)] = str(round(s/l, 3)) + '%' if l != 0 else '-' col = 'F' for a in range(0, 4): for b in range(0, 4): if a == b: continue else: ws[col+str(r+2)] = book.col_basepair[b].lower() ext_ = mxr[logical_and( mxr['major_idx']==a, mxr['minor_idx']==b )] ws[col+str(row)] = len(ext_) if len(ext_) is not 0: # maf들의 평균 maf_ = (divide( ext_[['minor']], ext_[['sum']]) * 100).sum()[0] ws[col+str(mafrow)] = str(round( maf_ / len(ext_), 3)) + '%' else: ws[col+str(mafrow)] = 'N/A' # 전체 개수의 maf if ws[col+str(r+3+book.nsheets)].value == None: ws[col+str(r+3+book.nsheets)] = len(ext_) else: ws[col+str(r+3+book.nsheets)].value += len(ext_) if ws[col+str(r+5+ 2*book.nsheets)].value == None: ws[col+str(r+5+ 2*book.nsheets)] = maf_ # ws[col+str(r+5+ 2*book.nsheets+1)] = ext_[['sum']].sum()[0] else: # print(i, "mj ", a, "mn ", b, ext_[['minor']].sum()[0]) ws[col+str(r+5+ 2*book.nsheets)].value += maf_ # ws[col+str(r+5+ 2*book.nsheets+1)].value += ext_[['sum']].sum()[0] col = next_col(col) # total maf col = 'F' numrow, mnrow = r+3+book.nsheets, r+5+2*book.nsheets for c in range(0, 12): ws[col+str(mnrow)] = str(round( float(ws[col+str(mnrow)].value) / float(ws[col+str(numrow)].value), 3))+'%' if ws[col+str(numrow)].value != 0 else 'N/A' col = next_col(col) infolog("End sheet7")
def get_Number_of_GPS(self, BP, s1, s2): """ s1, s2 범위의 maf값을 갖는 base pair의 수, 길이, minor 값을 """ from analyzer import infolog infolog("before import") from numpy import divide, logical_and infolog("before divide") infolog(BP.columns) maf_ = divide(BP[['minor']], BP[['sum']]) * 100 infolog("before and") idx = logical_and(maf_ >= s1, maf_ < s2) idx = idx.values.tolist() infolog("before return") infolog(maf_[idx].sum()[0]) return maf_[idx].sum()[0], len(BP[idx]), BP[['minor']][idx]
def sheet3(self,ws, book): infolog("Start sheet3") for i in range(0, len(self.s1)): infolog("Writing {0} ~ {1}".format(self.s1[i], self.s2[i])) # r : (s1,s2)의 범위 별 데이터의 row 변수 r = i*(15+len(book.GenomeStructure) + len(book.RepeatRegion))+ 1 ws['A'+str(r)] = "Region" ws.merge_cells(start_row=r, start_column=1, end_row=r+2, end_column=2) ws['C'+str(r)] = "Dumas Length (bp)" ws.merge_cells(start_row=r, start_column=3, end_row=r+2, end_column=3) ws['A'+str(r+3)] = "Genome Structure" ws.merge_cells(start_row=r+3, start_column=1, end_row=r+3+len(book.GenomeStructure), end_column=1) ws['A'+str(r+10)] = "Repeat region" ws.merge_cells(start_row=r+4+len(book.GenomeStructure), start_column=1, end_row=r+4+len(book.GenomeStructure)+len(book.RepeatRegion), end_column=1) # B, C 컬럼 데이터 : GenomeStructure, RepeatRegion 종류와 길이 # G_rows : GenomeStructure데이터를 넣기 위한 row 범위 # R_rows : RepeatRegion데이터를 넣기 위한 row 범위 G_rows = list(range(r+3, r+3+len(book.GenomeStructure))) cnt_num = 0 for ix in range(0, len(book.GenomeStructure)): ws['B' + str(G_rows[ix])] = book.GenomeStructure[ix] nlen = len(book.Dumas[ book.Dumas[ book.col_GenomeStructure ] == book.GenomeStructure[ix]]) # dumas length 기준 ws['C' + str(G_rows[ix])] = nlen cnt_num += nlen ws['B' + str(r+3+len(book.GenomeStructure))] = "Total" ws['C' + str(r+3+len(book.GenomeStructure))] = cnt_num cnt_num = 0 nr = r+4+len(book.GenomeStructure)+len(book.RepeatRegion) R_rows = list(range(r+4+len(book.GenomeStructure), nr)) for ix in range(0, len(book.RepeatRegion)): ws['B' + str(R_rows[ix])] = book.RepeatRegion[ix] nlen = len(book.Dumas[ book.Dumas[ book.col_RepeatRegion ] == book.RepeatRegion[ix]]) ws['C'+str(R_rows[ix])] = nlen cnt_num += nlen ws['B'+str(nr)] = 'Total' ws['C' + str(nr)] = cnt_num ## ORF NCR ws["A"+str(nr+1)] = "ORF" ws["A"+str(nr+2)] = "NCR" # ORF NCR Full Length ws['C'+str(nr+1)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.ORF)]) ws['C'+str(nr+2)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.NCR)]) # end B, C columns # ncol = 'D' # ncol : Column of 'Number of GPS' is need when calculating 'Number of GPS / Length' ncol = col = 'D' ws[col+str(r)] = "Number of GPS" gcol = next_n_col(col, book.nsheets) for c in range(0, book.nsheets): ws[col+str(r+2)] = book.sheet_list[c] # 35 이상 s, l, minor_ = book.get_Number_of_GPS(book.BP35[c], self.s1[i], self.s2[i]) # maf_ = str(round(s/l, 3)) + '%' if l is not 0 else '-' tx_rows = book.BP35[c].loc[ minor_.index ] tx_orf = tx_rows[ tx_rows[ book.col_ORF ].isin(book.ORF)] tx_ncr = tx_rows[ tx_rows[ book.col_ORF ].isin(book.NCR)] ws.merge_cells(start_row=r, start_column=4, end_row=r, end_column=4 + book.nsheets) ws[col+str(r+1)] = book.filename ws.merge_cells(start_row=r+1, start_column=4, end_row=r+1, end_column=4 + book.nsheets) self.insert_value_in_cell(ws, book, G_rows, col, gcol, book.BP35[c], minor_, book.col_GenomeStructure , "GPS") self.insert_value_in_cell(ws, book, R_rows, col, gcol, book.BP35[c], minor_, book.col_RepeatRegion , "GPS") ws[col+str(nr+1)] = len(tx_orf) ws[col+str(nr+2)] = len(tx_ncr) #180612 ws[gcol+str(nr+1)] = float(ws[gcol+str(nr+1)].value) + len(tx_orf) if ws[gcol+str(nr+1)].value != None else len(tx_orf) ws[gcol+str(nr+2)] = float(ws[gcol+str(nr+2)].value) + len(tx_ncr) if ws[gcol+str(nr+2)].value != None else len(tx_ncr) col = next_col(col) col = next_col(col) # "Average MAF at GPS" ws[col+str(r)] = "Average MAF at GPS" dcol = next_n_col(col, book.nsheets) for c in range(0, book.nsheets): ws[col+str(r+2)] = book.sheet_list[c] ws.merge_cells(start_row=r, start_column=5 + book.nsheets, end_row=r, end_column=5 + 2 * book.nsheets) ws[col+str(r+1)] = book.filename ws.merge_cells(start_row=r+1, start_column=5 + book.nsheets, end_row=r+1, end_column=5 + 2 * book.nsheets) s, l, minor_ = book.get_Number_of_GPS(book.BP35[c], self.s1[i], self.s2[i]) # maf_ = str(round(s/l, 3)) + '%' if l is not 0 else '-' tx_rows = book.BP35[c].loc[ minor_.index ] tx_orf = tx_rows[ tx_rows[ book.col_ORF ].isin(book.ORF)] tx_ncr = tx_rows[ tx_rows[ book.col_ORF ].isin(book.NCR)] self.insert_value_in_cell(ws, book, G_rows, col, dcol, book.BP35[c], minor_, book.col_GenomeStructure , "MAF") self.insert_value_in_cell(ws, book, R_rows, col, dcol, book.BP35[c], minor_, book.col_RepeatRegion , "MAF") # 180612 maf_ = (divide(tx_orf[["minor"]], book.BP35[c][['sum']].loc[ tx_orf.index]) * 100).sum()[0] ws[col+str(nr+1)] = str(round( maf_ /len(tx_orf), 3)) +'%' if len(tx_orf) is not 0 else '-' ws[dcol+str(nr+1)] = float(ws[dcol+str(nr+1)].value) + maf_ if ws[dcol+str(nr+1)].value != None else maf_ maf_ = (divide(tx_ncr[["minor"]], book.BP35[c][['sum']].loc[ tx_ncr.index]) * 100).sum()[0] ws[col+str(nr+2)] = str(round( maf_ /len(tx_ncr), 3)) +'%' if len(tx_ncr) is not 0 else '-' ws[dcol+str(nr+2)] = float(ws[dcol+str(nr+2)].value) + maf_ if ws[dcol+str(nr+2)].value != None else maf_ col = next_col(col) col = next_col(col) # "Number of GPS / length" d2col = next_n_col(col, book.nsheets) ws[col+str(r)] = "Number of GPS / length * 100" for c in range(0, book.nsheets): ws[col+str(r+2)] = book.sheet_list[c] ws.merge_cells(start_row=r, start_column=6 + 2 *book.nsheets, end_row=r, end_column=6 + 3 *book.nsheets) ws[col+str(r+1)] = book.filename ws.merge_cells(start_row=r+1, start_column=6 + 2 *book.nsheets, end_row=r+1, end_column=6 + 3 *book.nsheets) ridx = sum_ = 0 for rs in range(r+3, r+5+len(book.GenomeStructure)+len(book.RepeatRegion)): if ridx < len(book.GenomeStructure): llen_ = len(book.BP35[c][ book.BP35[c][ book.col_GenomeStructure] == book.GenomeStructure[ridx]]) sum_ += llen_ elif ridx == len(book.GenomeStructure): llen_ = sum_ sum_ = 0 elif len(book.GenomeStructure) < ridx and ridx < len(book.GenomeStructure)+1+len(book.RepeatRegion): llen_ = len(book.BP35[c][ book.BP35[c][ book.col_RepeatRegion] == book.RepeatRegion[ridx-1-len(book.GenomeStructure)]]) sum_ += llen_ else: llen_ = sum_ ws[col + str(rs)] = (ws[ncol+str(rs)].value / llen_) * 100 if llen_ is not 0 else '-' ridx = ridx + 1 # 180612 강제지정 ws[d2col + str(rs)] = ws[d2col + str(rs)].value + llen_ if ws[d2col + str(rs)].value != None else llen_ # ORF & NCR for rs in range(2): tmp, rs_ = 0, r+5+len(book.GenomeStructure)+len(book.RepeatRegion) + rs if rs == 0: tmp = len(book.BP35[c][ book.BP35[c][book.col_ORF].isin(book.ORF)]) ws[col+str(rs_)] = (ws[ncol+str(rs_)].value / tmp) * 100 if tmp != 0 else '-' else: tmp = len(book.BP35[c][ book.BP35[c][book.col_ORF].isin(book.NCR)]) ws[col+str(rs_)] = (ws[ncol+str(rs_)].value / tmp) * 100 if tmp != 0 else '-' # 180612 강제지정 ws[d2col + str(rs_)] = ws[d2col + str(rs_)].value + tmp if ws[d2col + str(rs_)].value != None else tmp ncol = next_col(ncol) col = next_col(col) # 180612 강제지정 for rs in range(r+3, r+7+len(book.GenomeStructure)+len(book.RepeatRegion)): # 수정 : 위치를 올림, 원래 위치 [수정2] ws[dcol+str(rs)] = str(round(float(ws[dcol+str(rs)].value) / float(ws[gcol+str(rs)].value), 3)) + '%' if float(ws[dcol+str(rs)].value) != 0 else '-' ws[gcol+str(rs)].value = float(ws[gcol+str(rs)].value) / book.nsheets # 수정2 : ws[dcol+str(rs)] = str(round(float(ws[dcol+str(rs)].value) / float(ws[gcol+str(rs)].value), 3)) + '%' if float(ws[dcol+str(rs)].value) != 0 else '-' ws[d2col + str(rs)] = (float(ws[gcol + str(rs)].value) / ( float(ws[d2col + str(rs)].value) / book.nsheets)) * 100 if float(ws[d2col + str(rs)].value) != 0.0 else 0 col = next_col(col) ws[col+str(r+3)] = str(self.s1[i]) + "~" + str(self.s2[i]) + "%" #if self.s2[i] != 51.0 else str(self.s1[i]) + "% 이상" infolog("finished writing GenomeStructure & RepeatRegion") #### Full Sequence col = next_col(next_col(next_col(col))) self.Full_Seq_in_sheet3(ws, col, book) infolog("End sheet3")
def Full_Seq_in_sheet3(self, ws, col, book): infolog("Start Full_Seq_in_sheet3") # Writing list of GenomeStructure & RepeatRegion, NCR & ORF for r in range(4, 4+len(book.GenomeStructure)): ws[col+str(r)] = book.GenomeStructure[r-4] ws[col+str(4+len(book.GenomeStructure))] = 'Total' for r in range(0, len(book.RepeatRegion)): row = r+5+len(book.GenomeStructure) ws[col+str(row)] = book.RepeatRegion[r] ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = 'Total' rs_ = 5+len(book.GenomeStructure)+len(book.RepeatRegion) ws[col+str(rs_+1)], ws[col+str(rs_+2)] = 'ORF', 'NCR' # Writing the count of each sheets col = next_col(col) for i in range(0, book.nsheets): infolog("{0} Writing {1} sheet".format(time.time(), i)) ws[col+str(2)] = book.sheet_list[i] # 35 이하 포함 ws[col+str(3)] = '(BP_full)Length' sum_ = 0 for r in range(4, 4+len(book.GenomeStructure)): tmp = len(book.BPRaw[i][ book.BPRaw[i][ book.col_GenomeStructure] == book.GenomeStructure[r-4]]) if len(book.BPRaw[i])!=0 else 0 ws[col+str(r)] = tmp sum_ += tmp ws[col+str(4+len(book.GenomeStructure))] = sum_ sum_=0 for r in range(0, len(book.RepeatRegion)): row = r+5+len(book.GenomeStructure) tmp = len(book.BPRaw[i][ book.BPRaw[i][ book.col_RepeatRegion] == book.RepeatRegion[r]]) if len(book.BPRaw[i])!=0 else 0 ws[col+str(row)] = tmp sum_ += tmp ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = sum_ # ORF & NCR ws[col+str(rs_+1)] = len(book.BPRaw[i][ book.BPRaw[i][ book.col_ORF].isin(book.ORF)]) if len(book.BPRaw[i])!=0 else 0 ws[col+str(rs_+2)] = len(book.BPRaw[i][ book.BPRaw[i][ book.col_ORF].isin(book.NCR)]) if len(book.BPRaw[i])!=0 else 0 # sum of basepair 35 이상 col = next_col(col) ws[col+str(3)] = '(BP_35이상)Length' sum_ = 0 for r in range(4, 4+len(book.GenomeStructure)): tmp = len(book.BP35[i][ book.BP35[i][ book.col_GenomeStructure] == book.GenomeStructure[r-4]]) if len(book.BPRaw[i])!=0 else 0 ws[col+str(r)] = tmp sum_ += tmp ws[col+str(4+len(book.GenomeStructure))] = sum_ sum_=0 for r in range(0, len(book.RepeatRegion)): row = r+5+len(book.GenomeStructure) tmp = len(book.BP35[i][ book.BP35[i][ book.col_RepeatRegion] == book.RepeatRegion[r]]) if len(book.BP35[i])!=0 else 0 ws[col+str(row)] = tmp sum_ += tmp ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = sum_ ws[col+str(rs_+1)] = len(book.BP35[i][ book.BP35[i][ book.col_ORF ].isin(book.ORF)]) if len(book.BP35[i])!=0 else 0 ws[col+str(rs_+2)] = len(book.BP35[i][ book.BP35[i][ book.col_ORF ].isin(book.NCR)]) if len(book.BP35[i])!=0 else 0 col = next_col(col) infolog("End Full_Seq_in_sheet3")