def read_and_collapse_table(infile, options, missing_value=""): '''collapse a table. Collapse a table of two columns with row names in the first column. Outputs a table with multiple columns for each row name. ''' fields, table = CSV.readTable(infile, with_header=options.has_headers, as_rows=True) if len(fields) != 2: raise NotImplementedError("can only work on tables with two columns") values = collections.defaultdict(list) # column header after which to add separator = table[0][0] row_names = set([x[0] for x in table]) row_name, value = table[0] values[row_name].append(value) added = set([row_name]) for row_name, value in table[1:]: if row_name == separator: for r in row_names: if r not in added: values[r].append(missing_value) added = set() values[row_name].append(value) added.add(row_name) for r in row_names: if r not in added: values[r].append(missing_value) sizes = set([len(x) for x in list(values.values())]) assert len(sizes) == 1, "unequal number of row_names" size = list(sizes)[0] options.stdout.write("row\t%s\n" % ("\t".join(["column_%i" % x for x in range(size)]))) for key, row in list(values.items()): options.stdout.write("%s\t%s\n" % (key, "\t".join(row)))
def getGODescriptions(infile): '''build dictionary mapping GOids to types and descriptions. Arguments --------- infile : string Filename of table with GO assignments Returns ------- mapping : dict Dictionary mapping GOid to GOtype and GOdescription. ''' with IOTools.open_file(infile) as inf: fields, table = CSV.readTable(inf, as_rows=False) return dict([ (y, (x, z)) for x, y, z in zip(table[fields.index("go_type")], table[ fields.index("go_id")], table[fields.index("description")]) ])
def computeFDR(infile, options): '''compute FDR on a table. ''' fields, table = CSV.readTable(infile, with_header=options.has_headers, as_rows=True) options.stdout.write("\t".join(fields) + "\n") for row in table: data = [] for x in range(len(fields)): data.append(row[x].split(options.separator)) nrows = max([len(d) for d in data]) for d in data: d += [""] * (nrows - len(d)) for n in range(nrows): options.stdout.write("\t".join([d[n] for d in data]) + "\n")
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-m", "--method", dest="methods", type="choice", action="append", choices=("transpose", "normalize-by-max", "normalize-by-value", "multiply-by-value", "percentile", "remove-header", "normalize-by-table", "upper-bound", "lower-bound", "kullback-leibler", "expand", "compress", "fdr", "grep"), help="""actions to perform on table.""") parser.add_option("-s", "--scale", dest="scale", type="float", help="factor to scale matrix by.") parser.add_option("-f", "--format", dest="format", type="string", help="output number format [default]") parser.add_option("-p", "--parameters", dest="parameters", type="string", help="Parameters for various functions.") parser.add_option("-t", "--header-names", dest="has_headers", action="store_true", help="matrix has row/column headers.") parser.add_option("--transpose", dest="transpose", action="store_true", help="transpose table.") parser.add_option( "--set-transpose-field", dest="set_transpose_field", type="string", help="set first field (row 1 and col 1) to this value [%default].") parser.add_option("--transpose-format", dest="transpose_format", type="choice", choices=( "default", "separated", ), help="input format of un-transposed table") parser.add_option( "--expand", dest="expand_table", action="store_true", help="expand table - multi-value cells with be expanded over " "several rows.") parser.add_option("--no-headers", dest="has_headers", action="store_false", help="matrix has no row/column headers.") parser.add_option("--columns", dest="columns", type="string", help="columns to use.") parser.add_option("--file", dest="file", type="string", help="columns to test from table.", metavar="FILE") parser.add_option("-d", "--delimiter", dest="delimiter", type="string", help="delimiter of columns.", metavar="DELIM") parser.add_option("-V", "--invert-match", dest="invert_match", action="store_true", help="invert match.") parser.add_option("--sort-by-rows", dest="sort_rows", type="string", help="output order for rows.") parser.add_option("-a", "--value", dest="value", type="float", help="value to use for various algorithms.") parser.add_option("--group", dest="group_column", type="int", help="group values by column. Supply an integer column " "[default=%default]") parser.add_option("--group-function", dest="group_function", type="choice", choices=("min", "max", "sum", "mean", "stats", "cat", "uniq"), help="function to group values by.") parser.add_option("--join-table", dest="join_column", type="int", help="join rows in a table by columns.") parser.add_option( "--collapse-table", dest="collapse_table", type="string", help="collapse a table. Value determines the missing variable " "[%default].") parser.add_option("--join-column-name", dest="join_column_name", type="int", help="use this column as a prefix.") parser.add_option("--flatten-table", dest="flatten_table", action="store_true", help="flatten a table [%default].") parser.add_option("--as-column", dest="as_column", action="store_true", help="output table as a single column.") parser.add_option("--split-fields", dest="split_fields", action="store_true", help="split fields.") parser.add_option( "--separator", dest="separator", type="string", help="separator for multi-valued fields [default=%default].") parser.add_option( "--fdr-method", dest="fdr_method", type="choice", choices=("BH", "bonferroni", "holm", "hommel", "hochberg", "BY"), help="method to perform multiple testing correction by controlling " "the fdr [default=%default].") parser.add_option( "--fdr-add-column", dest="fdr_add_column", type="string", help="add new column instead of replacing existing columns. " "The value of the option will be used as prefix if there are " "multiple columns [%default]") # IMS: add option to use a column as the row id in flatten parser.add_option( "--id-column", dest="id_column", type="string", help="list of column(s) to use as the row id when flattening " "the table. If None, then row number is used. [default=%default].") parser.add_option( "--variable-name", dest="variable_name", type="string", help="the column header for the 'variable' column when flattening " "[default=%default].") parser.add_option( "--value-name", dest="value_name", type="string", help="the column header for the 'value' column when flattening " "[default=%default].") parser.set_defaults( methods=[], scale=1.0, has_headers=True, format=None, value=0.0, parameters="", columns="all", transpose=False, set_transpose_field=None, transpose_format="default", group=False, group_column=0, group_function="mean", missing_value="na", sort_rows=None, flatten_table=False, collapse_table=None, separator=";", expand=False, join_column=None, join_column_name=None, compute_fdr=None, as_column=False, fdr_method="BH", fdr_add_column=None, id_column=None, variable_name="column", value_name="value", file=None, delimiter="\t", invert_match=False, ) (options, args) = E.start(parser, add_pipe_options=True) options.parameters = options.parameters.split(",") if options.group_column: options.group = True options.group_column -= 1 ###################################################################### ###################################################################### ###################################################################### # if only to remove header, do this quickly if options.methods == ["remove-header"]: first = True for line in options.stdin: if line[0] == "#": continue if first: first = False continue options.stdout.write(line) elif options.transpose or "transpose" in options.methods: readAndTransposeTable(options.stdin, options) elif options.flatten_table: # IMS: bug fixed to make work. Also added options for keying # on a particular and adding custom column headings fields, table = CSV.readTable(options.stdin, with_header=options.has_headers, as_rows=True) options.columns = getColumns(fields, options.columns) if options.id_column: id_columns = [int(x) - 1 for x in options.id_column.split(",")] id_header = "\t".join( [fields[id_column] for id_column in id_columns]) options.columns = [ x for x in options.columns if x not in id_columns ] else: id_header = "row" options.stdout.write( "%s\t%s\t%s\n" % (id_header, options.variable_name, options.value_name)) for x, row in enumerate(table): if options.id_column: row_id = "\t".join( [row[int(x) - 1] for x in options.id_column.split(",")]) else: row_id = str(x) for y in options.columns: options.stdout.write("%s\t%s\t%s\n" % (row_id, fields[y], row[y])) elif options.as_column: fields, table = CSV.readTable(options.stdin, with_header=options.has_headers, as_rows=True) options.columns = getColumns(fields, options.columns) table = list(zip(*table)) options.stdout.write("value\n") for column in options.columns: options.stdout.write("\n".join(table[column]) + "\n") elif options.split_fields: # split comma separated fields fields, table = CSV.readTable(options.stdin, with_header=options.has_headers, as_rows=True) options.stdout.write("%s\n" % ("\t".join(fields))) for row in table: row = [x.split(options.separator) for x in row] for d in itertools.product(*row): options.stdout.write("%s\n" % "\t".join(d)) elif options.group: readAndGroupTable(options.stdin, options) elif options.join_column: readAndJoinTable(options.stdin, options) elif options.expand_table: readAndExpandTable(options.stdin, options) elif options.collapse_table is not None: readAndCollapseTable(options.stdin, options, options.collapse_table) elif "grep" in options.methods: options.columns = [int(x) - 1 for x in options.columns.split(",")] patterns = [] if options.file: infile = IOTools.open_file(options.file, "r") for line in infile: if line[0] == "#": continue patterns.append(line[:-1].split(options.delimiter)[0]) else: patterns = args for line in options.stdin: data = line[:-1].split(options.delimiter) found = False for c in options.columns: if data[c] in patterns: found = True break if (not found and options.invert_match) or ( found and not options.invert_match): print(line[:-1]) else: ###################################################################### ###################################################################### ###################################################################### # Apply remainder of transformations fields, table = CSV.readTable(options.stdin, with_header=options.has_headers, as_rows=False) # convert columns to list table = [list(x) for x in table] ncols = len(fields) if len(table) == 0: raise ValueError("table is empty") nrows = len(table[0]) E.info("processing table with %i rows and %i columns" % (nrows, ncols)) options.columns = getColumns(fields, options.columns) # convert all values to float for c in options.columns: for r in range(nrows): try: table[c][r] = float(table[c][r]) except ValueError: continue for method in options.methods: if method == "normalize-by-value": value = float(options.parameters[0]) del options.parameters[0] for c in options.columns: table[c] = [x / value for x in table[c]] elif method == "multiply-by-value": value = float(options.parameters[0]) del options.parameters[0] for c in options.columns: table[c] = [x * value for x in table[c]] elif method == "normalize-by-max": for c in options.columns: m = max(table[c]) table[c] = [x / m for x in table[c]] elif method == "kullback-leibler": options.stdout.write("category1\tcategory2\tkl1\tkl2\tmean\n") format = options.format if format is None: format = "%f" for x in range(0, len(options.columns) - 1): for y in range(x + 1, len(options.columns)): c1 = options.columns[x] c2 = options.columns[y] e1 = 0 e2 = 0 for z in range(nrows): p = table[c1][z] q = table[c2][z] e1 += p * math.log(p / q) e2 += q * math.log(q / p) options.stdout.write( "%s\t%s\t%s\t%s\t%s\n" % (fields[c1], fields[c2], format % e1, format % e2, format % ((e1 + e2) / 2))) E.stop() sys.exit(0) elif method == "rank": for c in options.columns: tt = table[c] t = list(zip(tt, list(range(nrows)))) t.sort() for i, n in zip([x[1] for x in t], list(range(nrows))): tt[i] = n elif method in ("lower-bound", "upper-bound"): boundary = float(options.parameters[0]) del options.parameters[0] new_value = float(options.parameters[0]) del options.parameters[0] if method == "upper-bound": for c in options.columns: for r in range(nrows): if isinstance(table[c][r], float) and \ table[c][r] > boundary: table[c][r] = new_value else: for c in options.columns: for r in range(nrows): if isinstance(table[c][r], float) and \ table[c][r] < boundary: table[c][r] = new_value elif method == "fdr": pvalues = [] for c in options.columns: pvalues.extend(table[c]) assert max(pvalues) <= 1.0, "pvalues > 1 in table: max=%s" % \ str(max(pvalues)) assert min(pvalues) >= 0, "pvalue < 0 in table: min=%s" % \ str(min(pvalues)) # convert to str to avoid test for float downstream qvalues = list( map( str, Stats.adjustPValues(pvalues, method=options.fdr_method))) if options.fdr_add_column is None: x = 0 for c in options.columns: table[c] = qvalues[x:x + nrows] x += nrows else: # add new column headers if len(options.columns) == 1: fields.append(options.fdr_add_column) else: for co in options.columns: fields.append(options.fdr_add_column + fields[c]) x = 0 for c in options.columns: # add a new column table.append(qvalues[x:x + nrows]) x += nrows ncols += len(options.columns) elif method == "normalize-by-table": other_table_name = options.parameters[0] del options.parameters[0] other_fields, other_table = CSV.readTable( IOTools.open_file(other_table_name, "r"), with_header=options.has_headers, as_rows=False) # convert all values to float for c in options.columns: for r in range(nrows): try: other_table[c][r] = float(other_table[c][r]) except ValueError: continue # set 0s to 1 in the other matrix for c in options.columns: for r in range(nrows): if isinstance(table[c][r], float) and \ isinstance(other_table[c][r], float) and \ other_table[c][r] != 0: table[c][r] /= other_table[c][r] else: table[c][r] = options.missing_value # convert back if options.format is not None: for c in options.columns: for r in range(nrows): if isinstance(table[c][r], float): table[c][r] = format % table[c][r] options.stdout.write("\t".join(fields) + "\n") if options.sort_rows: old2new = {} for r in range(nrows): old2new[table[0][r]] = r for x in options.sort_rows.split(","): if x not in old2new: continue r = old2new[x] options.stdout.write( "\t".join(map(str, [table[c][r] for c in range(ncols)])) + "\n") else: for r in range(nrows): options.stdout.write( "\t".join(map(str, [table[c][r] for c in range(ncols)])) + "\n") E.stop()
def readAndGroupTable(infile, options): """read table from infile and group. """ fields, table = CSV.readTable(infile, with_header=options.has_headers, as_rows=True) options.columns = getColumns(fields, options.columns) assert options.group_column not in options.columns converter = float new_fields = [fields[options.group_column] ] + [fields[x] for x in options.columns] if options.group_function == "min": f = min elif options.group_function == "max": f = max elif options.group_function == "sum": f = lambda z: reduce(lambda x, y: x + y, z) elif options.group_function == "mean": f = scipy.mean elif options.group_function == "cat": f = lambda x: ";".join([y for y in x if y != ""]) converter = str elif options.group_function == "uniq": f = lambda x: ";".join([y for y in set(x) if y != ""]) converter = str elif options.group_function == "stats": f = lambda x: str(Stats.DistributionalParameters(x)) # update headers new_fields = [fields[options.group_column]] for c in options.columns: new_fields += list([ "%s_%s" % (fields[c], x) for x in Stats.DistributionalParameters().getHeaders() ]) # convert values to floats (except for group_column) # Delete rows with unconvertable values and not in options.columns new_table = [] for row in table: skip = False new_row = [row[options.group_column]] for c in options.columns: if row[c] == options.missing_value: new_row.append(row[c]) else: try: new_row.append(converter(row[c])) except ValueError: skip = True break if not skip: new_table.append(new_row) table = new_table new_rows = CSV.groupTable(table, group_column=0, group_function=f) options.stdout.write("\t".join(new_fields) + "\n") for row in new_rows: options.stdout.write("\t".join(map(str, row)) + "\n")
def main(argv=None): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("--output-filename-pattern", dest="output_filename_pattern", type="string", help="pattern for additional output files [%default].") parser.set_defaults( length=1000, minimum_coverage=0.90, maximum_reads=[1, 10, 20, 50, 100], output_filename_pattern="%s", normalize=True, ) (options, args) = E.start(parser, add_csv_options=True) fields, table = CSV.readTable(sys.stdin, dictreader=CSV.DictReaderLarge) map_fields2column = {} for x in fields: map_fields2column[x] = len(map_fields2column) coverage_5prime = numpy.zeros(options.length, numpy.float) coverage_3prime = numpy.zeros(options.length, numpy.float) coverage_maxreads5prime = numpy.zeros(options.length, numpy.float) coverage_maxreads3prime = numpy.zeros(options.length, numpy.float) coverage_full5prime = numpy.zeros(options.length, numpy.float) coverage_full3prime = numpy.zeros(options.length, numpy.float) coverage_min5prime = numpy.zeros(options.length, numpy.float) coverage_min3prime = numpy.zeros(options.length, numpy.float) histograms = [] for x in range(len(options.maximum_reads)): histograms.append([ numpy.zeros(options.length, numpy.float), numpy.zeros(options.length, numpy.float), 0 ]) ninput, noutput, nfull, nmincov, nskipped, nlength, nmaxreads = 0, 0, 0, 0, 0, 0, 0 for row in table: length, covered, meancov, data, nreads = (int(row["cov_nval"]), float(row["cov_covered"]), float(row["cov_mean"]), row["cov_values"], int(row["nover2"])) ninput += 1 if length < options.length: nlength += 1 continue if data == "na": nskipped += 1 continue noutput += 1 mincov = covered / length values = list(map(float, data.split(";"))) m = max(values) values = [x / m for x in values] coverage_5prime += values[0:1000] coverage_3prime += values[-1000:] if mincov >= 1.0: coverage_full5prime += values[0:1000] coverage_full3prime += values[-1000:] nfull += 1 if meancov >= options.minimum_coverage: coverage_min5prime += values[0:1000] coverage_min3prime += values[-1000:] nmincov += 1 for maxreads in range(len(options.maximum_reads)): if nreads <= options.maximum_reads[maxreads]: histograms[maxreads][0] += values[0:1000] histograms[maxreads][1] += values[-1000:] histograms[maxreads][2] += 1 if options.normalize: for x5, x3 in ((coverage_5prime, coverage_3prime), (coverage_min5prime, coverage_min3prime), (coverage_full5prime, coverage_full3prime)): m = max((max(x5), max(x3))) x3 /= m x5 /= m for x5, x3, c in histograms: m = max((max(x5), max(x3))) x5 /= m x3 /= m outfile = options.stdout outfile.write("\t".join(("distance", "minlen-5'", "minlen-3'", "mincov-5'", "mincov-3'", "full-5'", "full-3'")) + "\n") for x in range(0, options.length): outfile.write("\t".join([ "%6.4f" % x for x in (x, coverage_5prime[x], coverage_3prime[x], coverage_min5prime[x], coverage_min3prime[x], coverage_full5prime[x], coverage_full3prime[x]) ]) + "\n") outfile5 = IOTools.open_file(options.output_filename_pattern % "reads5", "w") outfile3 = IOTools.open_file(options.output_filename_pattern % "reads3", "w") outfile5.write("\t".join([ "distance", ] + [ "reads%i" % options.maximum_reads[y] for y in range(len(options.maximum_reads)) ]) + "\n") outfile3.write("\t".join([ "distance", ] + [ "reads%i" % options.maximum_reads[y] for y in range(len(options.maximum_reads)) ]) + "\n") for x in range(0, options.length): outfile5.write("%i\t%s\n" % (x, "\t".join([ "%6.4f" % histograms[y][0][x] for y in range(len(options.maximum_reads)) ]))) outfile3.write("%i\t%s\n" % (x, "\t".join([ "%6.4f" % histograms[y][1][x] for y in range(len(options.maximum_reads)) ]))) E.info( "ninput=%i, noutput=%i, nmaxreads=%i, nfull=%i, nmincov=%i, nskipped=%i, nlength=%i" % (ninput, noutput, nmaxreads, nfull, nmincov, nskipped, nlength)) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id: data2bins.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("--column", dest="column", type="int", help="column to split on.") parser.add_option("--num-bins", dest="num_bins", type="int", help="number of bins to create.") parser.add_option("--method", dest="method", type="choice", choices=("equal-sized-bins",), help="method to use to bin data.") parser.add_option("--no-headers", dest="has_headers", action="store_false", help="matrix has no row/column headers.") parser.add_option("-p", "--output-filename-pattern", dest="output_filename_pattern", type="string", help="OUTPUT filename with histogram information on aggregate coverages [%default].") parser.set_defaults( has_headers=True, method="equal-sized-bins", column=1, num_bins=4, output_filename_pattern="bin%i", ) (options, args) = E.start(parser) options.column -= 1 if args: if args[0] == "-": infile = sys.stdin else: infile = IOTools.open_file(args[0], "r") else: infile = sys.stdin fields, data = CSV.readTable(infile) c = options.column values = [float(x[c]) for x in data] bins = [] if options.method == "equal-sized-bins": increment = int(math.floor(float(len(values)) / options.num_bins)) indices = list(range(0, len(values))) indices.sort(key=lambda x: values[x]) for x in range(len(values)): values[indices[x]] = x bins = list(range(0, len(values) - increment, increment)) elif options.method == "pass": pass E.debug("bins=%s" % str(bins)) outputters = [] for x in range(0, len(bins)): outputters.append( Outputter(options.output_filename_pattern % x, fields)) # output tables for x in range(0, len(data)): bin = bisect.bisect(bins, values[x]) - 1 outputters[bin].write(data[x]) # stats if options.loglevel >= 1: options.stdlog.write("# bin\tstart\tcounts\tfilename\n") for x in range(0, len(bins)): options.stdlog.write("# %i\t%f\t%i\t%s\n" % ( x, bins[x], outputters[x].mCounts, outputters[x].mFilename)) E.info("ninput=%i, noutput=%i" % (len(data), sum((x.mCounts for x in outputters)))) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: csv_set.py 2782 2009-09-10 11:40:29Z andreas $") parser.add_option("-u", "--unique", dest="unique", action="store_true", help="output rows are uniq.") parser.add_option("-1", "--join-fields1", dest="join_fields1", type="string", help="join fields in first table.") parser.add_option("-2", "--join-fields2", dest="join_fields2", type="string", help="join fields in second table.") parser.add_option("-m", "--method", dest="method", type="choice", help="set operation to perform.", choices=("intersection", "rest", "union")) parser.set_defaults( remove=False, unique=False, join_fields1=None, join_fields2=None, method="intersection", ) (options, args) = E.start(parser, add_csv_options=True) if len(args) != 2: raise ValueError("please specify two files to join") if not options.join_fields1 or not options.join_fields2: raise ValueError("please specify at least one join field per table") options.join_fields1 = options.join_fields1.split(",") options.join_fields2 = options.join_fields2.split(",") options.filename1, options.filename2 = args fields1, table1 = CSV.readTable(open(options.filename1, "r")) fields2, table2 = CSV.readTable(open(options.filename2, "r")) if options.unique: outfile = UniqueBuffer(sys.stdout) else: outfile = options.stdout nfields1 = [] for x in range(len(fields1)): if fields1[x] in options.join_fields1: nfields1.append(x) nfields2 = [] for x in range(len(fields2)): if fields2[x] in options.join_fields2: nfields2.append(x) # calculate row indices: double keys are not taken care of here keys = {} for row1 in table1: v = [row1[x] for x in nfields1] key = hashlib.md5("".join(v)).digest() keys[key] = row1 if options.method == "intersection": # build new field list take = list(range(len(fields1))) c = len(take) for x in fields2: if x not in options.join_fields2: take.append(c) c += 1 t = fields1 + fields2 new_fields = [t[x] for x in take] print("\t".join(new_fields)) for row2 in table2: v = [row2[x] for x in nfields2] key = hashlib.md5("".join(v)).digest() if key in keys: new_row = keys[key] + row2 outfile.write("\t".join([new_row[x] for x in take]) + "\n") elif options.method == "rest": new_fields = fields2 print("\t".join(new_fields)) for row2 in table2: v = [row2[x] for x in nfields2] key = hashlib.md5("".join(v)).digest() if key not in keys: outfile.write("\t".join(row2) + "\n") E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: csv_intersection.py 2782 2009-09-10 11:40:29Z andreas $") parser.add_option("-u", "--unique", dest="unique", action="store_true", help="output rows are uniq.") parser.set_defaults( remove=False, unique=False, ) (options, args) = E.start(parser, add_csv_options=True) if len(args) != 2: raise ValueError("please specify two files to join") options.filename1, options.filename2 = args table1 = CSV.readTable(IOTools.open_file(options.filename1, "r")) table2 = CSV.readTable(IOTools.open_file(options.filename2, "r")) if options.unique: outfile = UniqueBuffer(sys.stdout) else: outfile = options.stdout # build new field list new_fields = [] for x in options.join_fields1: new_fields.append(x) for x in fields1: if x not in options.join_fields1: new_fields.append(x) if x not in options.join_fields2: new_fields.append(x) writer = csv.DictWriter(outfile, fields, dialect=options.csv_dialect, lineterminator=options.csv_lineterminator, extrasaction='ignore') if len(lines) > 0: old_fields = lines[0][:-1].split("\t") if options.remove: fields = [] for x in old_fields: if x not in input_fields: fields.append(x) else: fields = input_fields reader = csv.DictReader(lines, dialect=options.csv_dialect) print("\t".join(fields)) first_row = True for row in reader: row = IOTools.convertDictionary(row) writer.writerow(row) E.stop()