def xls_tidy(xls,qvalue): d=etl.fromtsv(xls) sd=etl.select(d,lambda x: float(x.PepQValue) <=float(qvalue)) psmsummary=sd ssd=etl.cut(sd, 'Peptide', 'Protein', 'PepQValue') #remove the mod info in peptide. ssd=etl.transform.regex.sub(ssd,'Peptide', r'^[\w-]\.(.+)\.[\w-]$', r'\1') ssd=etl.transform.regex.sub(ssd,'Peptide', r'[\d\.\+]+', r'') aggregation = OrderedDict() aggregation['SpecCount'] = len cssd=etl.aggregate(ssd, 'Peptide', aggregation) fssd=etl.groupselectfirst(ssd, key=('Peptide','Protein',"PepQValue")) aggregation = OrderedDict() aggregation['Protein'] = 'Protein', etl.strjoin(';') aggregation['PepQValue'] = 'PepQValue', etl.strjoin(';') assd=etl.aggregate(fssd, 'Peptide', aggregation) pepsummary=etl.join(assd, cssd, key='Peptide') return (psmsummary, pepsummary)
table3 = aggregate(table1, 'foo', sum, 'bar') look(table3) # alternative signature for single field aggregation using keyword args table4 = aggregate(table1, key=('foo', 'bar'), aggregation=list, value=('bar', 'baz')) look(table4) # aggregate multiple fields from collections import OrderedDict from petl import strjoin aggregation = OrderedDict() aggregation['count'] = len aggregation['minbar'] = 'bar', min aggregation['maxbar'] = 'bar', max aggregation['sumbar'] = 'bar', sum aggregation['listbar'] = 'bar' # default aggregation function is list aggregation['listbarbaz'] = ('bar', 'baz'), list aggregation['bars'] = 'bar', strjoin(', ') table5 = aggregate(table1, 'foo', aggregation) look(table5) # can also use list or tuple to specify multiple field aggregation aggregation = [('count', len), ('minbar', 'bar', min), ('maxbar', 'bar', max), ('sumbar', 'bar', sum), ('listbar', 'bar'), # default aggregation function is list ('listbarbaz', ('bar', 'baz'), list), ('bars', 'bar', strjoin(', '))] table6 = aggregate(table1, 'foo', aggregation) look(table6) # can also use suffix notation table7 = aggregate(table1, 'foo') table7['count'] = len
from petl import rangeaggregate, look, strjoin look(table1) # aggregate whole rows table2 = rangeaggregate(table1, 'bar', 2, len) look(table2) # aggregate single field table3 = rangeaggregate(table1, 'bar', 2, list, 'foo') look(table3) # aggregate single field - alternative signature using keyword args table4 = rangeaggregate(table1, key='bar', width=2, aggregation=list, value='foo') look(table4) # aggregate multiple fields from collections import OrderedDict aggregation = OrderedDict() aggregation['foocount'] = len aggregation['foojoin'] = 'foo', strjoin('') aggregation['foolist'] = 'foo' # default is list table5 = rangeaggregate(table1, 'bar', 2, aggregation) look(table5) # rowmap table1 = [['id', 'sex', 'age', 'height', 'weight'], [1, 'male', 16, 1.45, 62.0], [2, 'female', 19, 1.34, 55.4], [3, 'female', 17, 1.78, 74.4], [4, 'male', 21, 1.33, 45.2], [5, '-', 25, 1.65, 51.9]] from petl import rowmap, look
table4 = aggregate(table1, key=('foo', 'bar'), aggregation=list, value=('bar', 'baz')) look(table4) # aggregate multiple fields from collections import OrderedDict from petl import strjoin aggregation = OrderedDict() aggregation['count'] = len aggregation['minbar'] = 'bar', min aggregation['maxbar'] = 'bar', max aggregation['sumbar'] = 'bar', sum aggregation['listbar'] = 'bar' # default aggregation function is list aggregation['bars'] = 'bar', strjoin(', ') table5 = aggregate(table1, 'foo', aggregation) look(table5) # can also use list or tuple to specify multiple field aggregation aggregation = [ ('count', len), ('minbar', 'bar', min), ('maxbar', 'bar', max), ('sumbar', 'bar', sum), ('listbar', 'bar'), # default aggregation function is list ('bars', 'bar', strjoin(', ')) ] table6 = aggregate(table1, 'foo', aggregation) look(table6) # can also use suffix notation table7 = aggregate(table1, 'foo')
# aggregate single field table3 = aggregate(table1, 'foo', sum, 'bar') look(table3) # alternative signature for single field aggregation table4 = aggregate(table1, key=('foo', 'bar'), aggregation=list, value=('bar', 'baz')) look(table4) # aggregate multiple fields from collections import OrderedDict from petl import strjoin aggregation = OrderedDict() aggregation['count'] = len aggregation['minbar'] = 'bar', min aggregation['maxbar'] = 'bar', max aggregation['sumbar'] = 'bar', sum aggregation['listbar'] = 'bar' # default aggregation function is list aggregation['bars'] = 'bar', strjoin(', ') table5 = aggregate(table1, 'foo', aggregation) look(table5) # can also use list or tuple to specify multiple field aggregation aggregation = [('count', len), ('minbar', 'bar', min), ('maxbar', 'bar', max), ('sumbar', 'bar', sum), ('listbar', 'bar'), # default aggregation function is list ('bars', 'bar', strjoin(', '))] table6 = aggregate(table1, 'foo', aggregation) look(table6) # can also use suffix notation table7 = aggregate(table1, 'foo') table7['count'] = len table7['minbar'] = 'bar', min
aggregation=list, value=('bar', 'baz')) table4 # aggregate multiple fields from collections import OrderedDict import petl as etl aggregation = OrderedDict() aggregation['count'] = len aggregation['minbar'] = 'bar', min aggregation['maxbar'] = 'bar', max aggregation['sumbar'] = 'bar', sum # default aggregation function is list aggregation['listbar'] = 'bar' aggregation['listbarbaz'] = ('bar', 'baz'), list aggregation['bars'] = 'bar', etl.strjoin(', ') table5 = etl.aggregate(table1, 'foo', aggregation) table5 # mergeduplicates() ################### import petl as etl table1 = [['foo', 'bar', 'baz'], ['A', 1, 2.7], ['B', 2, None], ['D', 3, 9.4], ['B', None, 7.8], ['E', None, 42.], ['D', 3, 12.3], ['A', 2, None]] table2 = etl.mergeduplicates(table1, 'foo') table2 # merge() #########
table4 = etl.aggregate(table1, key=('foo', 'bar'), aggregation=list, value=('bar', 'baz')) table4 # aggregate multiple fields from collections import OrderedDict import petl as etl aggregation = OrderedDict() aggregation['count'] = len aggregation['minbar'] = 'bar', min aggregation['maxbar'] = 'bar', max aggregation['sumbar'] = 'bar', sum # default aggregation function is list aggregation['listbar'] = 'bar' aggregation['listbarbaz'] = ('bar', 'baz'), list aggregation['bars'] = 'bar', etl.strjoin(', ') table5 = etl.aggregate(table1, 'foo', aggregation) table5 # mergeduplicates() ################### import petl as etl table1 = [['foo', 'bar', 'baz'], ['A', 1, 2.7], ['B', 2, None], ['D', 3, 9.4], ['B', None, 7.8], ['E', None, 42.], ['D', 3, 12.3],