Beispiel #1
0
def CustomPivot():
    root = Tk()
    root.withdraw()
    a = filedialog.askopenfilename(initialdir = "D:\PhytonSCripts\MQOutput" \
           ,title = "Choose a MQ DP_peptides.deppep file to plot",\
           filetypes = (("deppep files","*.deppep"),("all files","*.*")))
    a = pd.read_table(a,low_memory=False)
    a = a.drop(0).reset_index(drop = True)
    
    b = filedialog.askopenfilename(initialdir = "D:\PhytonSCripts\MQOutput" \
           ,title = "Choose a MQ DP_peptides.deppep file to plot",\
           filetypes = (("deppep files","*.deppep"),("all files","*.*")))
    b = pd.read_table(b,low_memory=False)
    b = b.drop(0).reset_index(drop = True)

    a['Intensity'] = a['Intensity'].astype(float)
    b['Intensity'] = b['Intensity'].astype(float)

    raw_tablea = pd.pivot_table(a, values = 'Intensity',\
    index = ['DP Cluster Index','DP AA','DP Base Sequence'], columns = 'Raw file')
    raw_tableb = pd.pivot_table(b, values = 'Intensity',\
    index = ['DP Cluster Index','DP AA','DP Base Sequence'], columns = 'Raw file')
    
    raw_tablea = raw_tablea.reset_index()
    raw_tableb = raw_tableb.reset_index()
    
    # magic
    extractRows(raw_tablea)
    
    return raw_tablea,raw_tableb
Beispiel #2
0
def CustomPivot():

    start_time = time.time()
    root = Tk()
    root.withdraw()
    a = filedialog.askopenfilename(initialdir = "D:\Thomas\PhytonSCripts\MQOutput" \
           ,title = "Choose a MQ DP_peptides.deppep file to plot",\
           filetypes = (("deppep files","*.deppep"),("all files","*.*")))
    a = pd.read_table(a, low_memory=False)
    a = a.drop(0).reset_index(drop=True)

    a['Intensity'] = a['Intensity'].astype(float)

    table = pd.pivot_table(a, values = 'Intensity',\
    index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities'], columns = 'Raw file')

    table = table.reset_index()

    # magic
    table = extractRowsandPivot(table)
    table = table.sort_values(by=['DP Cluster Index'])
    table = table.reset_index(drop=True)
    print("---Runtime =  %s minutes ---" % (time.time() - start_time))

    return table
Beispiel #3
0
def janspivot(a,reg):
    if reg:
        table = pd.pivot_table(a, values = 'Intensity',\
        index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities',\
                 'DP Probabilities after Regression'], columns = 'Raw file')
        table = table.reset_index()
        grouped = table.groupby(['DP Cluster Index', 'DP Base Sequence'])
        probs = grouped['DP Probabilities'].apply(lambda df: ';'.join(df))
        means = grouped.apply(lambda df: df[table.columns[4:]].mean())
        probsreg = grouped['DP Probabilities after Regression'].apply(lambda df: ';'.join(df))  
        pivot = pd.concat([probs,probsreg,means], axis=1)
        pivot = pivot.reset_index()
    else:
        table = pd.pivot_table(a, values = 'Intensity',\
        index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities'], columns = 'Raw file')
        table = table.reset_index()
        grouped = table.groupby(['DP Cluster Index', 'DP Base Sequence'])
        probs = grouped['DP Probabilities'].apply(lambda df: ';'.join(df))
        means = grouped.apply(lambda df: df[table.columns[4:]].mean())
        pivot = pd.concat([probs,means], axis=1)
        pivot = pivot.reset_index()
    return pivot
Beispiel #4
0
# -*- coding: utf-8 -*-
"""
Created on Fri Apr  6 13:56:14 2018

@author: heinzinger
"""

import time as time
from perseuspy import pd
from tkinter import Tk
from tkinter import filedialog

if __name__ == "__main__":

    start_time = time.time()
    root = Tk()
    root.withdraw()
    a = filedialog.askopenfilename(initialdir = "D:\PhytonSCripts\MQOutput" \
           ,title = "Choose a MQ DP_peptides.deppep file to plot",\
           filetypes = (("deppep files","*.deppep"),("all files","*.*")))
    a = pd.read_table(a, low_memory=False)
    a = a.drop(0).reset_index(drop=True)

    a['Intensity'] = a['Intensity'].astype(float)

    table = pd.pivot_table(a, values = 'Intensity',\
    index = ['DP Cluster Index','DP Base Sequence'], columns = 'Raw file')

    table = table.reset_index()

    print("---Runtime =  %s seconds ---" % (time.time() - start_time))
Beispiel #5
0
def testMQ():
    root = Tk()
    root.withdraw()
    a = filedialog.askopenfilename(initialdir = "D:\Thomas\PhytonSCripts\MQOutput" \
           ,title = "Choose a MQ nomatch.deppep file to plot",\
           filetypes = (("deppep files","*.deppep"),("all files","*.*")))
    a = pd.read_table(a, low_memory=False)
    a = a.drop(0).reset_index(drop=True)

    b = filedialog.askopenfilename(initialdir = "D:\Thomas\PhytonSCripts\MQOutput" \
           ,title = "Choose a MQ matching.deppep file to plot",\
           filetypes = (("deppep files","*.deppep"),("all files","*.*")))
    b = pd.read_table(b, low_memory=False)
    b = b.drop(0).reset_index(drop=True)

    arg = a[[
        'Raw file', 'DP Base Raw File', 'DP Proteins', 'DP Base Sequence'
    ]]
    argb = b[[
        'Raw file', 'DP Base Raw File', 'DP Proteins', 'DP Base Sequence'
    ]]

    uniq = arg['DP Base Sequence'].unique()
    uniqb = argb['DP Base Sequence'].unique()
    out = uniq[np.in1d(uniq, uniqb)]
    df = pd.DataFrame({'Raw Files': arg['Raw file'].unique()})
    df['counta'] = 0
    df['countb'] = 0
    count = 0
    countb = 0

    for x in range(len(out)):
        coin = arg[arg['DP Base Sequence'] == out[x]]
        coinb = argb[argb['DP Base Sequence'] == out[x]]
        coincount = np.in1d(coin['Raw file'].unique(),
                            coinb['Raw file'].unique())
        coinbcount = np.in1d(coinb['Raw file'].unique(),
                             coin['Raw file'].unique())
        count = count + coincount[coincount == False].size
        countb = countb + coinbcount[coinbcount == False].size

        for y in (coin['Raw file'].unique()[coincount == False]):
            df['counta'][df['Raw Files'] ==
                         y] = df['counta'][df['Raw Files'] == y] + 1
        for z in (coinb['Raw file'].unique()[coinbcount == False]):
            df['countb'][df['Raw Files'] ==
                         z] = df['countb'][df['Raw Files'] == z] + 1
    df.plot.bar()

    print('Amount of peptide sequences found in the Raw Files of file 1 but not'\
          ' found in the Raw Files of file 2 is ',count, '. For file 2: ', countb)
    print('!!!Both peptide sequences need to be present in both files!!!')
    print('Amount of dp. Peptides in file 1: ', len(a), ' in file 2: ', len(b))

    values = pd.DataFrame(a['Raw file'].value_counts())
    values['Raw'] = b['Raw file'].value_counts().values
    values.columns = ['No match', 'Matching']
    values = values.sort_index()
    values.plot.bar()

    a['Intensity'] = a['Intensity'].astype(float)
    b['Intensity'] = b['Intensity'].astype(float)

    raw_tablea = pd.pivot_table(a, values = 'Intensity',\
    index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities'], columns = 'Raw file')
    raw_tableb = pd.pivot_table(b, values = 'Intensity',\
    index = ['DP Cluster Index','DP AA','DP Base Sequence','DP Probabilities'], columns = 'Raw file')

    raw_tablea = raw_tablea.reset_index()
    raw_tableb = raw_tableb.reset_index()

    return raw_tablea, raw_tableb
Beispiel #6
0
def MSFrgPivot(a):
    a['Raw file'] = a['Spectrum'].apply(lambda df: df.split('.')[0])
    a[a['Observed Modifications']!='Unknown']
    table = pd.pivot_table(a, values = 'Calculated M/Z',index = ['Peptide','Observed Modifications'], columns = 'Raw file')
    pivot = table.reset_index()    
    return pivot