def tab_reorder(fname, column_order, delim='\t'): f = gzip_opener(fname).open() for line in f: if line[0] == '#': sys.stdout.write(line) continue cols = line.rstrip('\n').split(delim) cols_used = set() outcols = [] for col_idx in column_order: if col_idx == '*': for i, val in enumerate(cols): if not i in cols_used: outcols.append(val) break else: outcols.append(cols[col_idx]) if col_idx < 0: cols_used.add(len(cols) + col_idx) else: cols_used.add(col_idx) sys.stdout.write('%s\n' % '\t'.join(outcols)) f.close()
def tab_view(fname,preview_lines=100,delim='\t',max_size=None, min_size=0): colsizes = [] coltypes = [] preview_buf = [] prev_count = 0 inpreview = True try: f = gzip_opener(fname).open() for line in f: if inpreview and line[0] == '#': preview_buf.append(line) else: if inpreview: cols = line.rstrip().split(delim) for i,col in enumerate(cols): if len(colsizes) <= i: colsizes.append(len(col)) coltypes.append('i') elif len(col) > colsizes[i]: colsizes[i] = len(col) try: v = int(col) except: coltypes[i] = 't' preview_buf.append(line) prev_count += 1 if prev_count >= preview_lines: if max_size: colsizes = [ min(max_size, int(math.ceil(x * 1.2))) for x in colsizes ] else: colsizes = [ max(min_size, int(math.ceil(x * 1.2))) for x in colsizes ] for preview in preview_buf: _write_cols(preview,colsizes,coltypes) preview_buf = None inpreview=False else: _write_cols(line,colsizes,coltypes) if f != sys.stdin: f.close() if preview_buf: colsizes = [ int(math.ceil(x * 1.2)) for x in colsizes ] for preview in preview_buf: _write_cols(preview,colsizes,coltypes) except KeyboardInterrupt: print "" pass except IOError: print "" pass
def tab_combine(outfile, fnames ,delim='\t'): workbook = xlsxwriter.Workbook(outfile, {'strings_to_numbers': True}) for fname in fnames: sys.stderr.write(fname.rstrip(".gz").rstrip(".txt")) sys.stderr.write("... ") worksheet = workbook.add_worksheet(fname.rstrip(".gz").rstrip(".txt")[:31]) f = gzip_opener(fname).open() row = 0 for line in f: cols = line.rstrip().split(delim) for col, val in enumerate(cols): worksheet.write(row, col, val) row += 1 sys.stderr.write("done\n") f.close() workbook.close()
def filter_file(fname,criteria,header): f = gzip_opener(fname).open() line_num = 0 for line in f: if line[0] == '#': sys.stdout.write(line) continue if line_num == 0 and header: sys.stdout.write(line) line_num += 1 continue cols = line.strip().split('\t') good = True if criteria.filter(cols): sys.stdout.write(line) if f != sys.stdin: f.close()
def tab_tag(fname, colname, colvalue, colidx, delim='\t', noheader=False): f = gzip_opener(fname).open() header = not noheader for line in f: if line[0] == '#': sys.stdout.write(line) continue cols = line.rstrip('\n').split(delim) if header: cols.insert(colidx, colname) header = False else: cols.insert(colidx, colvalue) sys.stdout.write('%s\n' % '\t'.join(cols)) f.close()