def CustomPivot(): root = Tk() root.withdraw() a = filedialog.askopenfilename(initialdir = "D:\PhytonSCripts\MQOutput" \ ,title = "Choose a MQ DP_peptides.deppep file to plot",\ filetypes = (("deppep files","*.deppep"),("all files","*.*"))) a = pd.read_table(a,low_memory=False) a = a.drop(0).reset_index(drop = True) b = filedialog.askopenfilename(initialdir = "D:\PhytonSCripts\MQOutput" \ ,title = "Choose a MQ DP_peptides.deppep file to plot",\ filetypes = (("deppep files","*.deppep"),("all files","*.*"))) b = pd.read_table(b,low_memory=False) b = b.drop(0).reset_index(drop = True) a['Intensity'] = a['Intensity'].astype(float) b['Intensity'] = b['Intensity'].astype(float) raw_tablea = pd.pivot_table(a, values = 'Intensity',\ index = ['DP Cluster Index','DP AA','DP Base Sequence'], columns = 'Raw file') raw_tableb = pd.pivot_table(b, values = 'Intensity',\ index = ['DP Cluster Index','DP AA','DP Base Sequence'], columns = 'Raw file') raw_tablea = raw_tablea.reset_index() raw_tableb = raw_tableb.reset_index() # magic extractRows(raw_tablea) return raw_tablea,raw_tableb
def CustomPivot(): start_time = time.time() root = Tk() root.withdraw() a = filedialog.askopenfilename(initialdir = "D:\Thomas\PhytonSCripts\MQOutput" \ ,title = "Choose a MQ DP_peptides.deppep file to plot",\ filetypes = (("deppep files","*.deppep"),("all files","*.*"))) a = pd.read_table(a, low_memory=False) a = a.drop(0).reset_index(drop=True) a['Intensity'] = a['Intensity'].astype(float) table = pd.pivot_table(a, values = 'Intensity',\ index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities'], columns = 'Raw file') table = table.reset_index() # magic table = extractRowsandPivot(table) table = table.sort_values(by=['DP Cluster Index']) table = table.reset_index(drop=True) print("---Runtime = %s minutes ---" % (time.time() - start_time)) return table
def janspivot(a,reg): if reg: table = pd.pivot_table(a, values = 'Intensity',\ index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities',\ 'DP Probabilities after Regression'], columns = 'Raw file') table = table.reset_index() grouped = table.groupby(['DP Cluster Index', 'DP Base Sequence']) probs = grouped['DP Probabilities'].apply(lambda df: ';'.join(df)) means = grouped.apply(lambda df: df[table.columns[4:]].mean()) probsreg = grouped['DP Probabilities after Regression'].apply(lambda df: ';'.join(df)) pivot = pd.concat([probs,probsreg,means], axis=1) pivot = pivot.reset_index() else: table = pd.pivot_table(a, values = 'Intensity',\ index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities'], columns = 'Raw file') table = table.reset_index() grouped = table.groupby(['DP Cluster Index', 'DP Base Sequence']) probs = grouped['DP Probabilities'].apply(lambda df: ';'.join(df)) means = grouped.apply(lambda df: df[table.columns[4:]].mean()) pivot = pd.concat([probs,means], axis=1) pivot = pivot.reset_index() return pivot
# -*- coding: utf-8 -*- """ Created on Fri Apr 6 13:56:14 2018 @author: heinzinger """ import time as time from perseuspy import pd from tkinter import Tk from tkinter import filedialog if __name__ == "__main__": start_time = time.time() root = Tk() root.withdraw() a = filedialog.askopenfilename(initialdir = "D:\PhytonSCripts\MQOutput" \ ,title = "Choose a MQ DP_peptides.deppep file to plot",\ filetypes = (("deppep files","*.deppep"),("all files","*.*"))) a = pd.read_table(a, low_memory=False) a = a.drop(0).reset_index(drop=True) a['Intensity'] = a['Intensity'].astype(float) table = pd.pivot_table(a, values = 'Intensity',\ index = ['DP Cluster Index','DP Base Sequence'], columns = 'Raw file') table = table.reset_index() print("---Runtime = %s seconds ---" % (time.time() - start_time))
def testMQ(): root = Tk() root.withdraw() a = filedialog.askopenfilename(initialdir = "D:\Thomas\PhytonSCripts\MQOutput" \ ,title = "Choose a MQ nomatch.deppep file to plot",\ filetypes = (("deppep files","*.deppep"),("all files","*.*"))) a = pd.read_table(a, low_memory=False) a = a.drop(0).reset_index(drop=True) b = filedialog.askopenfilename(initialdir = "D:\Thomas\PhytonSCripts\MQOutput" \ ,title = "Choose a MQ matching.deppep file to plot",\ filetypes = (("deppep files","*.deppep"),("all files","*.*"))) b = pd.read_table(b, low_memory=False) b = b.drop(0).reset_index(drop=True) arg = a[[ 'Raw file', 'DP Base Raw File', 'DP Proteins', 'DP Base Sequence' ]] argb = b[[ 'Raw file', 'DP Base Raw File', 'DP Proteins', 'DP Base Sequence' ]] uniq = arg['DP Base Sequence'].unique() uniqb = argb['DP Base Sequence'].unique() out = uniq[np.in1d(uniq, uniqb)] df = pd.DataFrame({'Raw Files': arg['Raw file'].unique()}) df['counta'] = 0 df['countb'] = 0 count = 0 countb = 0 for x in range(len(out)): coin = arg[arg['DP Base Sequence'] == out[x]] coinb = argb[argb['DP Base Sequence'] == out[x]] coincount = np.in1d(coin['Raw file'].unique(), coinb['Raw file'].unique()) coinbcount = np.in1d(coinb['Raw file'].unique(), coin['Raw file'].unique()) count = count + coincount[coincount == False].size countb = countb + coinbcount[coinbcount == False].size for y in (coin['Raw file'].unique()[coincount == False]): df['counta'][df['Raw Files'] == y] = df['counta'][df['Raw Files'] == y] + 1 for z in (coinb['Raw file'].unique()[coinbcount == False]): df['countb'][df['Raw Files'] == z] = df['countb'][df['Raw Files'] == z] + 1 df.plot.bar() print('Amount of peptide sequences found in the Raw Files of file 1 but not'\ ' found in the Raw Files of file 2 is ',count, '. For file 2: ', countb) print('!!!Both peptide sequences need to be present in both files!!!') print('Amount of dp. Peptides in file 1: ', len(a), ' in file 2: ', len(b)) values = pd.DataFrame(a['Raw file'].value_counts()) values['Raw'] = b['Raw file'].value_counts().values values.columns = ['No match', 'Matching'] values = values.sort_index() values.plot.bar() a['Intensity'] = a['Intensity'].astype(float) b['Intensity'] = b['Intensity'].astype(float) raw_tablea = pd.pivot_table(a, values = 'Intensity',\ index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities'], columns = 'Raw file') raw_tableb = pd.pivot_table(b, values = 'Intensity',\ index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities'], columns = 'Raw file') raw_tablea = raw_tablea.reset_index() raw_tableb = raw_tableb.reset_index() return raw_tablea, raw_tableb
def MSFrgPivot(a): a['Raw file'] = a['Spectrum'].apply(lambda df: df.split('.')[0]) a[a['Observed Modifications']!='Unknown'] table = pd.pivot_table(a, values = 'Calculated M/Z',index = ['Peptide','Observed Modifications'], columns = 'Raw file') pivot = table.reset_index() return pivot