예제 #1
0
def tab_reorder(fname, column_order, delim='\t'):
    f = gzip_opener(fname).open()
    for line in f:
        if line[0] == '#':
            sys.stdout.write(line)
            continue

        cols = line.rstrip('\n').split(delim)

        cols_used = set()
        outcols = []

        for col_idx in column_order:
            if col_idx == '*':
                for i, val in enumerate(cols):
                    if not i in cols_used:
                        outcols.append(val)
                break
            else:
                outcols.append(cols[col_idx])
                if col_idx < 0:
                    cols_used.add(len(cols) + col_idx)
                else:
                    cols_used.add(col_idx)


        sys.stdout.write('%s\n' % '\t'.join(outcols))
    f.close() 
예제 #2
0
def tab_view(fname,preview_lines=100,delim='\t',max_size=None, min_size=0):
    colsizes = []
    coltypes = []
    preview_buf = []
    prev_count = 0
    inpreview = True

    try:
        f = gzip_opener(fname).open()
        for line in f:
            if inpreview and line[0] == '#':
                preview_buf.append(line)
            else:
                if inpreview:
                    cols = line.rstrip().split(delim)
                
                    for i,col in enumerate(cols):
                        if len(colsizes) <= i:
                            colsizes.append(len(col))
                            coltypes.append('i')
                        elif len(col) > colsizes[i]:
                            colsizes[i] = len(col)
                        try:
                            v = int(col)
                        except:
                            coltypes[i] = 't'

                    preview_buf.append(line)
                    prev_count += 1
                    if prev_count >= preview_lines:
                        if max_size:
                            colsizes = [ min(max_size, int(math.ceil(x * 1.2))) for x in colsizes ]
                        else:
                            colsizes = [ max(min_size, int(math.ceil(x * 1.2))) for x in colsizes ]
                        for preview in preview_buf:
                            _write_cols(preview,colsizes,coltypes)
                        preview_buf = None
                        inpreview=False
                else:
                    _write_cols(line,colsizes,coltypes)
        if f != sys.stdin:
            f.close()
        if preview_buf:
            colsizes = [ int(math.ceil(x * 1.2)) for x in colsizes ]
            for preview in preview_buf:
                _write_cols(preview,colsizes,coltypes)
    except KeyboardInterrupt:
        print ""
        pass
    except IOError:
        print ""
        pass
예제 #3
0
def tab_combine(outfile, fnames ,delim='\t'):
    workbook = xlsxwriter.Workbook(outfile, {'strings_to_numbers': True})

    for fname in fnames:
        sys.stderr.write(fname.rstrip(".gz").rstrip(".txt"))
        sys.stderr.write("... ")
        worksheet = workbook.add_worksheet(fname.rstrip(".gz").rstrip(".txt")[:31])
        f = gzip_opener(fname).open()
        row = 0
        for line in f:
            cols = line.rstrip().split(delim)
            for col, val in enumerate(cols):
                worksheet.write(row, col, val)
            row += 1
        sys.stderr.write("done\n")
        f.close()

    workbook.close()
예제 #4
0
def filter_file(fname,criteria,header):
    f = gzip_opener(fname).open()
    line_num = 0
    for line in f:
        if line[0] == '#':
            sys.stdout.write(line)
            continue
        if line_num == 0 and header:
            sys.stdout.write(line)
            line_num += 1
            continue
            
        cols = line.strip().split('\t')
        good = True
        if criteria.filter(cols):
            sys.stdout.write(line)
    if f != sys.stdin:
        f.close()
예제 #5
0
파일: tab_tag.py 프로젝트: mbreese/tabutils
def tab_tag(fname, colname, colvalue, colidx, delim='\t', noheader=False):
    f = gzip_opener(fname).open()
    header = not noheader
    for line in f:
        if line[0] == '#':
            sys.stdout.write(line)
            continue

        cols = line.rstrip('\n').split(delim)
        if header:
            cols.insert(colidx, colname)
            header = False
        else:
            cols.insert(colidx, colvalue)

        sys.stdout.write('%s\n' % '\t'.join(cols))
 
    f.close()