def main(): args = init_opt() (psmsummary, pepsummary)=xls_tidy(args.input,args.qvalue) etl.totsv(psmsummary, args.outprefix+"-psmSummary.tsv") etl.totsv(pepsummary, args.outprefix+"-pepSummary.tsv") out=open(args.outprefix+"-sequence.fa", 'w') proid={x.split(";")[0]:True for x in pepsummary['Protein']} #get the first protein id for header, seq in fasta_iter(args.db): if(header in proid): out.write(">%s\n%s\n" %(header,seq)) out.close()
def test_ZipSource(): # setup table = [('foo', 'bar'), ('a', '1'), ('b', '2')] totsv(table, 'tmp/issue_241.tsv') z = zipfile.ZipFile('tmp/issue_241.zip', mode='w') z.write('tmp/issue_241.tsv', 'data.tsv') z.close() # test actual = fromtsv(ZipSource('tmp/issue_241.zip', 'data.tsv')) ieq(table, actual)
def test_issue_231(): table = [['foo', 'bar'], ['a', '1'], ['b', '2']] t = cut(table, 'foo') totsv(t, 'tmp/issue_231.tsv') u = fromtsv('tmp/issue_231.tsv') ieq(t, u) tocsv(t, 'tmp/issue_231.csv') u = fromcsv('tmp/issue_231.csv') ieq(t, u) topickle(t, 'tmp/issue_231.pickle') u = frompickle('tmp/issue_231.pickle') ieq(t, u)
def test_zipsource(): # setup tbl = [('foo', 'bar'), ('a', '1'), ('b', '2')] fn_tsv = NamedTemporaryFile().name etl.totsv(tbl, fn_tsv) fn_zip = NamedTemporaryFile().name z = zipfile.ZipFile(fn_zip, mode='w') z.write(fn_tsv, 'data.tsv') z.close() # test actual = etl.fromtsv(ZipSource(fn_zip, 'data.tsv')) ieq(tbl, actual)
def test_totsv_appendtsv(): # exercise function table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2)) f = NamedTemporaryFile(delete=False) totsv(table, f.name) # check what it did with open(f.name, "rb") as o: actual = csv.reader(o, delimiter="\t") expect = [["foo", "bar"], ["a", "1"], ["b", "2"], ["c", "2"]] ieq(expect, actual) # check appending table2 = (("foo", "bar"), ("d", 7), ("e", 9), ("f", 1)) appendtsv(table2, f.name) # check what it did with open(f.name, "rb") as o: actual = csv.reader(o, delimiter="\t") expect = [["foo", "bar"], ["a", "1"], ["b", "2"], ["c", "2"], ["d", "7"], ["e", "9"], ["f", "1"]] ieq(expect, actual)
def test_totsv_appendtsv(): # exercise function table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) f = NamedTemporaryFile(delete=False) totsv(table, f.name) # check what it did with open(f.name, 'rb') as o: actual = csv.reader(o, delimiter='\t') expect = [['foo', 'bar'], ['a', '1'], ['b', '2'], ['c', '2']] ieq(expect, actual) # check appending table2 = (('foo', 'bar'), ('d', 7), ('e', 9), ('f', 1)) appendtsv(table2, f.name) # check what it did with open(f.name, 'rb') as o: actual = csv.reader(o, delimiter='\t') expect = [['foo', 'bar'], ['a', '1'], ['b', '2'], ['c', '2'], ['d', '7'], ['e', '9'], ['f', '1']] ieq(expect, actual)
import petl as etl readFile = etl.fromtsv("donedeal_data_sample.tsv") tmpTable = etl.addfield(readFile, 'InKms', lambda rec: rec['mileage']) tmpTable2File = etl.convert(tmpTable, 'InKms', lambda v: int(float(v) * 1.6), where=lambda r: r.mileageType == 'miles') etl.totsv(tmpTable2File, 'donedeal_inKms.tsv')
import petl as etl table1 = etl.fromtsv("D:\JOB\BI_Developer_Challenge\donedeal_data_sample.tsv") table2 = etl.convert(table1, 'mileage', float) table3 = etl.convert(table2, 'mileage', lambda v: v * 1.60934, where=lambda r: r.mileageType == 'miles') table4 = etl.convert(table3, 'mileageType', lambda v: 'km', where=lambda r: r.mileageType in ('miles', 'kilometres')) table4 = etl.convert(table3, 'mileageType', lambda v: 'NA', where=lambda r: r.mileageType not in ('km')) etl.totsv(table4, "D:\JOB\BI_Developer_Challenge\donedeal_data_etl.tsv")