def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--method", dest="method", type="choice", choices=["compensation", "parse_gating"], help="select method to perform on workspace " "file.") parser.add_option("--gating-directory", dest="gate_dir", type="string", help="directory to store gating dummy files") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) # write footer and output benchmark information. E.Stop() infile = argv[-1] if options.method == "compensation": split_file = infile.split("/") infile = split_file[-1] split_file.remove(infile) path = "/".join(split_file) out_df = P52.get_compensation_matrix(path=path, infile=infile) out_df.to_csv(options.stdout, sep="\t") elif options.method == "parse_gating": for dfile in P52.parse_gating_file(infile): outfile = options.gate_dir + "/" + dfile P.touch(outfile) else: pass
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--id-column", dest="id_col", type="string", help="column header for sample IDs") parser.add_option("--matrix-distance", dest="dist", type="choice", choices=["Euclid"], help="distance metric to use " "for distance between matrices") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) parser.set_defaults(filter_zero=True, filt_gate=None) infile = argv[-1] # the input file is a list of files myfunc = lambda x: x.rstrip("\n") with open(infile, "r") as ifile: all_files = ifile.readlines() list_of_files = map(myfunc, all_files) matrix_list = P52.makeMatrixList(list_of_files=list_of_files, id_col=options.id_col) distance_matrix = P52.getMatrixDistances(list_of_matrices=matrix_list, distance=options.dist) distance_matrix.to_csv(options.stdout, sep="\t", index_label=None) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--method", dest="method", type="choice", choices=["compensation", "parse_gating"], help="select method to perform on workspace " "file.") parser.add_option("--gating-directory", dest="gate_dir", type="string", help="directory to store gating dummy files") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) # write footer and output benchmark information. E.Stop() infile = argv[-1] if options.method == "compensation": split_file = infile.split("/") infile = split_file[-1] split_file.remove(infile) path = "/".join(split_file) out_df = P52.get_compensation_matrix(path=path, infile=infile) out_df.to_csv(options.stdout, sep="\t") elif options.method == "parse_gating": for dfile in P52.parse_gating_file(infile): outfile = options.gate_dir + "/" + dfile P.touch(outfile) else: pass
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--id-column", dest="id_col", type="string", help="column header for sample IDs") parser.add_option("--matrix-distance", dest="dist", type="choice", choices=["Euclid"], help="distance metric to use " "for distance between matrices") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) parser.set_defaults(filter_zero=True, filt_gate=None) infile = argv[-1] # the input file is a list of files myfunc = lambda x: x.rstrip("\n") with open(infile, "r") as ifile: all_files = ifile.readlines() list_of_files = map(myfunc, all_files) matrix_list = P52.makeMatrixList(list_of_files=list_of_files, id_col=options.id_col) distance_matrix = P52.getMatrixDistances(list_of_matrices=matrix_list, distance=options.dist) distance_matrix.to_csv(options.stdout, sep="\t", index_label=None) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--task", dest="task", type="choice", choices=["heritability", "merge", "kinship"], help="task") parser.add_option("--heritability", dest="equation", type="string", help="equation used to estimate heritability") parser.add_option("--monozygote-file", dest="mz_file", type="string", help="file containing monozygotic twin data") parser.add_option("--dizygote-file", dest="dz_file", type="string", help="file containing dizygotic twin data") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if options.task == "heritability": h_df = P52.estimate_heritability(mz_file=options.mz_file, dz_file=options.dz_file) # generate scatter plots of each marker outdir = "/".join(options.dz_file.split("/")[:-1]) plot_out = "-".join(options.dz_file.split("/")[-1].split("-")[1:4]) plot_out = os.path.join(outdir, plot_out) E.info("plotting correlations to %s" % plot_out) R('''suppressPackageStartupMessages(library(ggplot2))''') R('''mz.df <- read.table("%s", sep="\t", h=T, row.names=1)''' % options.mz_file) R('''dz.df <- read.table("%s", sep="\t", h=T, row.names=1)''' % options.dz_file) R('''all.dz <- data.frame(rbind(mz.df, dz.df))''') R('''p_cor <- ggplot(all.dz, aes(x=twin1, y=twin2, ''' '''colour=zygosity)) + ''' '''geom_point(size=1) + stat_smooth(method=lm) + ''' '''facet_wrap( ~ marker, scales="free")''') R('''png("%s-cors.png", height=720, width=720)''' % plot_out) R('''print(p_cor)''') R('''dev.off()''') options.stdout.write("H^2\n") for key in h_df.keys(): options.stdout.write("%s: %0.3f\n" % (key, h_df[key])) elif options.task == "merge": infiles = argv[-1] infiles = infiles.split(",") out_df = P52.merge_heritability(infiles) out_df.to_csv(options.stdout, sep="\t") # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--plot-type", dest="plot_type", type="choice", choices=["histogram", "scatter", "barchart"], help="type of plot to generate") parser.add_option("--x-axis", dest="x_axis", type="string", help="variable to plot on the X-axis." "This is the default axis for plotting.") parser.add_option("--y-axis", dest="y_axis", type="string", help="variable to plot on the Y-axis") parser.add_option("--split-by", dest="split_by", type="string", help="varible over which to split up plots") parser.add_option("--X-title", dest="x_title", type="string", help="label to attach to X-axis") parser.add_option("--Y-title", dest="y_title", type="string", help="label to attach to Y-axis") parser.add_option("--colour-var", dest="col_var", type="string", help="variable to colour points by") parser.add_option("--free-scale", dest="free_scale", type="choice", choices=["free_x", "free_y", "free"], help="whether to " "use free scaling on plot axes") parser.add_option("--outfile", dest="outfile", type="string", help="file to save plot to") parser.add_option("--melt-data", dest="melt", action="store_true", help="melt the dataframe first, requires ID vars") parser.add_option("--melt-id-vars", dest="id_vars", type="string", help="comma separated list of id variables for" " the melted dataframe") parser.add_option("--merge-frames", dest="merge", action="store_true", help="merge two input dataframes together") parser.add_option("--merge-id-vars", dest="merge_vars", type="string", help="comma separate list of id variables to merge " "two dataframes on") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) parser.set_defaults(free_scale="both", split_by=None, col_var=None, melt=False) infile = argv[-1] if len(infile.split(",")) == 2: infiles = infile.split(",") df1 = pd.read_table(infiles[0], sep=":", index_col=0, header=0) df2 = pd.read_table(infiles[1], sep=":", index_col=0, header=0) ids = options.merge_vars.split(",") df1[options.y_axis] = df1.index df2[options.y_axis] = df2.index df1.columns = [ids[0], options.y_axis] df2.columns = [ids[0], options.y_axis] df = pd.merge(df1, df2, on=options.y_axis) # these need to not be hard-coded! df.columns = ["mean_h2", ids[0], "fano_h2"] else: df = pd.read_table(infile, sep="\t", index_col=0, header=0) # assumes the first column is the index if options.melt: mids = options.id_vars.split(",") if options.y_axis: _df = pd.melt(df, id_vars=options.x_axis, value_name=options.y_axis, var_name=options.col_var) else: _df = pd.melt(df, id_vars=mids, value_name=options.x_axis, var_name=options.col_var) df = _df else: pass # check variables are present try: var = df[options.x_axis] except ValueError: raise ValueError("no plotting variable found") if options.col_var: try: cols = df[options.col_var] except ValueError: E.warn("Colour variable not found in data frame." "Check the data file is the correct one") else: pass if options.split_by: try: splits = df[options.split_by] except ValueError: E.warn("Split-by variable not found in the data " "frame. Check the data file is the correct" " one.") else: pass try: assert options.outfile except AssertionError: raise IOError("no output file detected") if options.plot_type == "histogram": P52.plotHistogram(data=df, variable=options.x_axis, save_path=options.outfile, x_title=options.x_title, y_title=options.y_title, colour_var=options.col_var, scales=options.free_scale, split_var=options.split_by) elif options.plot_type == "barchart": P52.plotBarchart(data=df, x_variable=options.x_axis, y_variable=options.y_axis, save_path=options.outfile, x_title=options.x_title, y_title=options.y_title, colour_var=options.col_var, split_var=options.split_by) else: pass # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--twin-id-column", dest="id_column", type="string", help="column number or header for twin IDs in " "input tables") parser.add_option("--demographics-file", dest="demo_file", type="string", help="tab-separated text file containing twins " "demographic data") parser.add_option( "--demo-id-column", dest="demo_id_column", type="string", help="column header or number that indicates the twin IDs " "that match to those from the flow cytometry data") parser.add_option("--task", dest="task", type="choice", choices=[ "merge_flow", "split_zygosity", "regress_confounding", "kinship" ], help="choose a task") parser.add_option("--output-file-pattern", dest="out_pattern", type="string", help="output filename pattern to use where output is " "multiple files") parser.add_option("--output-directory", dest="out_dir", type="string", help="directory to output files into") parser.add_option("--zygosity-column", dest="zygosity_col", type="string", help="column header containing zygosity information") parser.add_option("--id-columns", dest="id_headers", type="string", help="comma-separated list of column headers used to " "uniquely identify each sample") parser.add_option("--family-id-column", dest="family_id", type="string", help="column header containing family IDs") parser.add_option("--confounding-column", dest="confounding", type="string", help="either a comma-separates list or single value, " "column number or column header") parser.add_option("--marker-group", dest="marker_col", type="string", help="column header containing marker IDs to group " "regression fits by") parser.add_option("--filter-zero-arrays", dest="filter_zero", action="store_true", help="Filter out arrays where there are no observations") parser.add_option("--database", dest="database", type="string", help="absolute path to SQLite database") parser.add_option("--tablename", dest="table", type="string", help="tablename to extract from SQL database") parser.add_option("--filter-gates", dest="filt_gate", type="string", help="regex to filter out unwanted triboolean gates") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) parser.set_defaults(filter_zero=True, filt_gate=None) infile = argv[-1] if options.task == "merge_flow": merged_arrays = P52.mergeGateArrays(db=options.database, table_name=options.table, filter_zero=options.filter_zero, filter_gate=options.filt_gate) all_dfs = P52.mergeArrayWithDemographics( flow_arrays=merged_arrays, id_column=options.id_column, demo_file=options.demo_file, demo_id_column=options.demo_id_column) for out_df in all_dfs: # construct the table names using cell_type, panel and gate # cell type and statistic should be in the out_pattern outname = "-".join( [options.out_pattern, out_df["gate"][0], out_df["panel"][0]]) outname = outname.replace("/", "_") out_file = "/".join([options.out_dir, outname]) E.info("writing %s data to file" % outname) # file names may contain '/', replace these with "_" out_df.to_csv(out_file, sep="\t", index_col="indx") # memory useage was ballooning from R's amazing ability # to free up memory after garbage collection. P52.clearREnvironment() elif options.task == "split_zygosity": out_frames = P52.split_zygosity( infile=infile, zygosity_column=options.zygosity_col, id_headers=(options.id_headers).split(","), pair_header=options.family_id) # expect keys: MZ and DZ try: MZ_frame = out_frames["MZ"] DZ_frame = out_frames["DZ"] # output filenames using pattern and zygosity as a prefix MZ_outfile = "-".join([options.out_pattern, "MZ.tsv"]) MZ_frame.to_csv(MZ_outfile, sep="\t", index_label="indx") DZ_outfile = "-".join([options.out_pattern, "DZ.tsv"]) DZ_frame.to_csv(DZ_outfile, sep="\t", index_label="indx") except TypeError: pass elif options.task == "regress_confounding": out_df = P52.regress_out_confounding( infile=infile, confounding_column=options.confounding, group_var=options.marker_col) out_df.to_csv(options.stdout, sep="\t", index_label="indx") elif options.task == "kinship": out_df = P52.make_kinship_matrix(twins_file=infile, id_column=options.id_headers, family_column=options.family_id, zygosity_column=options.zygosity_col) out_df.to_csv(options.stdout, index_label="indx", sep="\t") # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--task", dest="task", type="choice", choices=["heritability", "merge", "kinship"], help="task") parser.add_option("--heritability", dest="equation", type="string", help="equation used to estimate heritability") parser.add_option("--monozygote-file", dest="mz_file", type="string", help="file containing monozygotic twin data") parser.add_option("--dizygote-file", dest="dz_file", type="string", help="file containing dizygotic twin data") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if options.task == "heritability": h_df = P52.estimate_heritability(mz_file=options.mz_file, dz_file=options.dz_file) # generate scatter plots of each marker outdir = "/".join(options.dz_file.split("/")[:-1]) plot_out = "-".join(options.dz_file.split("/")[-1].split("-")[1:4]) plot_out = os.path.join(outdir, plot_out) E.info("plotting correlations to %s" % plot_out) R('''suppressPackageStartupMessages(library(ggplot2))''') R('''mz.df <- read.table("%s", sep="\t", h=T, row.names=1)''' % options.mz_file) R('''dz.df <- read.table("%s", sep="\t", h=T, row.names=1)''' % options.dz_file) R('''all.dz <- data.frame(rbind(mz.df, dz.df))''') R('''p_cor <- ggplot(all.dz, aes(x=twin1, y=twin2, ''' '''colour=zygosity)) + ''' '''geom_point(size=1) + stat_smooth(method=lm) + ''' '''facet_wrap( ~ marker, scales="free")''') R('''png("%s-cors.png", height=720, width=720)''' % plot_out) R('''print(p_cor)''') R('''dev.off()''') options.stdout.write("H^2\n") for key in h_df.keys(): options.stdout.write("%s: %0.3f\n" % (key, h_df[key])) elif options.task == "merge": infiles = argv[-1] infiles = infiles.split(",") out_df = P52.merge_heritability(infiles) out_df.to_csv(options.stdout, sep="\t") # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--fcs-directory", dest="fcs_dir", type="string", help="path to directory containing .fcs files" " for processsing") parser.add_option("--output-format", dest="out_format", type="choice", choices=("summary", "intensity"), help="output either" " summary of intensities or raw intensity data") parser.add_option("--summary-stats", dest="stats", type="choice", choices=("fano", "mean", "std", "var", "median", "geometric", "regress"), help="summary statistics to output if " "out_format == summary") parser.add_option("--gating-strategy", dest="gates", type="string", help=".tsv of gating strategy. See docs for details") parser.add_option( "--compensation-matrix", dest="comp_matrix", type="string", help="text file containing the compensation/spillover matrix") parser.add_option("--cell-type", dest="cell_type", type="string", help="description of cell type gene expression " "is measured on. Will be added to output file name") parser.add_option("--panel-id", dest="panel", type="string", help="ID for marker panel") parser.add_option("--output-directory", dest="out_dir", type="string", help="output directory path for files") parser.add_option( "--fileset-identifier", dest="fileset_id", type="string", help="unique identifier for sets of files/samples processed " "together. Useful for assigning to batches for processing") parser.add_option("--database", dest="database", type="string", help="SQLite database to write results to ") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) E.info("Processing data for %s in panel %s" % (options.cell_type, options.panel)) if options.cell_type == "CD4_Tmem" and options.panel == "P3": P52.get_cd4_Tmem_panel3(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD8_Tmem" and options.panel == "P3": P52.get_cd8_Tmem_panel3(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD4_Tnaive" and options.panel == "P3": P52.get_cd4_naive_panel3(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD8_Tnaive" and options.panel == "P3": P52.get_cd8_naive_panel3(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD4_Tcell" and options.panel == "P1": P52.get_cd4_tcells_panel1(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD8_Tcell" and options.panel == "P1": P52.get_cd8_tcells_panel1(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "DN_Tcell" and options.panel == "P1": P52.get_dn_tcells_panel1(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "DP_Tcell" and options.panel == "P1": P52.get_dp_tcells_panel1(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD4_Tcell" and options.panel == "P2a": P52.get_cd4_tcells_panel2(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD4_Tcell" and options.panel == "P2b": P52.get_cd4_tcells_panel2(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD8_Tcell" and options.panel == "P2a": P52.get_cd8_tcells_panel2(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD8_Tcell" and options.panel == "P2b": P52.get_cd8_tcells_panel2(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "DN_Tcell" and options.panel == "P2a": P52.get_dn_tcells_panel2(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "DN_Tcell" and options.panel == "P2b": P52.get_dn_tcells_panel2(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "DP_Tcell" and options.panel == "P2a": P52.get_dp_tcells_panel2(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "DP_Tcell" and options.panel == "P2b": P52.get_dp_tcells_panel2(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) # elif options.cell_type == "early_NK" and options.panel == "P4": # P52.get_early_nkcells_panel4(fcs_dir=options.fcs_dir, # out_dir=options.out_dir, # comp_matrix=options.comp_matrix, # panel=options.panel, # setid=options.fileset_id, # cell_subset=options.cell_type, # db=options.database) # elif options.cell_type == "terminal_NK" and options.panel == "P4": # P52.get_terminal_nkcells_panel4(fcs_dir=options.fcs_dir, # out_dir=options.out_dir, # comp_matrix=options.comp_matrix, # panel=options.panel, # setid=options.fileset_id, # cell_subset=options.cell_type, # db=options.database) # elif options.cell_type == "mature_NK" and options.panel == "P4": # P52.get_mature_nkcells_panel4(fcs_dir=options.fcs_dir, # out_dir=options.out_dir, # comp_matrix=options.comp_matrix, # panel=options.panel, # setid=options.fileset_id, # cell_subset=options.cell_type, # db=options.database) elif options.cell_type == "NKT_early" and options.panel == "P5": P52.get_early_nktcells_panel5(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "NKT_naive" and options.panel == "P5": P52.get_naive_nktcells_panel5(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "NKT_terminal" and options.panel == "P5": P52.get_terminal_nktcells_panel5(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "NKT_effector" and options.panel == "P5": P52.get_effector_nktcells_panel5(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "Vd1_Tcells" and options.panel == "P5": P52.get_Vd1_tcells_panel5(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "Vd2p_Vg9dim" and options.panel == "P5": P52.get_Vd2_vg9dim_panel5(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "Vd2n_Vg9p" and options.panel == "P5": P52.get_Vd2n_vg9p_panel5(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "hemat_SCs" and options.panel == "P5": P52.get_hemat_SC_panel5(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "immature_Bcell" and options.panel == "P6": P52.get_immature_Bcells_panel6(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "mature_Bcell" and options.panel == "P6": P52.get_mature_Bcells_panel6(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "memory_Bcell" and options.panel == "P6": P52.get_memory_Bcells_panel6(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "naive_Bcell" and options.panel == "P6": P52.get_naive_Bcells_panel6(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "IgA_Bmem" and options.panel == "P6": P52.get_IgA_Bcells_panel6(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "IgG_Bmem" and options.panel == "P6": P52.get_IgG_Bcells_panel6(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "IgM_Bmem" and options.panel == "P6": P52.get_IgM_Bcells_panel6(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "IgE_Bmem" and options.panel == "P6": P52.get_IgE_Bcells_panel6(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "monocytes" and options.panel == "P7": P52.get_monocytes_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "M_dendritic" and options.panel == "P7": P52.get_myeloid_DC_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "DC_cd123cd11c" and options.panel == "P7": P52.get_cd123cd11c_DC_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "P_dendritic" and options.panel == "P7": P52.get_plasmacytoid_DC_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD8_APCs" and options.panel == "P7": P52.get_CD8_APC_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD4_APCs" and options.panel == "P7": P52.get_CD4_APC_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD1cneg_mDC" and options.panel == "P7": P52.get_CD1cneg_mDC_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "Inflam_DC" and options.panel == "P7": P52.get_inflammatory_DC_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) elif options.cell_type == "CD16neg_DC" and options.panel == "P7": P52.get_CD16neg_DC_panel7(fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database) else: outfile = "%s/%s-%s-%s.tsv" % (options.out_dir, options.fileset_id, options.panel, options.cell_type) P.touch(outfile) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option( "--fcs-directory", dest="fcs_dir", type="string", help="path to directory containing .fcs files" " for processsing", ) parser.add_option( "--output-format", dest="out_format", type="choice", choices=("summary", "intensity"), help="output either" " summary of intensities or raw intensity data", ) parser.add_option( "--summary-stats", dest="stats", type="choice", choices=("fano", "mean", "std", "var", "median", "geometric", "regress"), help="summary statistics to output if " "out_format == summary", ) parser.add_option( "--gating-strategy", dest="gates", type="string", help=".tsv of gating strategy. See docs for details" ) parser.add_option( "--compensation-matrix", dest="comp_matrix", type="string", help="text file containing the compensation/spillover matrix", ) parser.add_option( "--cell-type", dest="cell_type", type="string", help="description of cell type gene expression " "is measured on. Will be added to output file name", ) parser.add_option("--panel-id", dest="panel", type="string", help="ID for marker panel") parser.add_option("--output-directory", dest="out_dir", type="string", help="output directory path for files") parser.add_option( "--fileset-identifier", dest="fileset_id", type="string", help="unique identifier for sets of files/samples processed " "together. Useful for assigning to batches for processing", ) parser.add_option("--database", dest="database", type="string", help="SQLite database to write results to ") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) E.info("Processing data for %s in panel %s" % (options.cell_type, options.panel)) if options.cell_type == "CD4_Tmem" and options.panel == "P3": P52.get_cd4_Tmem_panel3( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD8_Tmem" and options.panel == "P3": P52.get_cd8_Tmem_panel3( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD4_Tnaive" and options.panel == "P3": P52.get_cd4_naive_panel3( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD8_Tnaive" and options.panel == "P3": P52.get_cd8_naive_panel3( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD4_Tcell" and options.panel == "P1": P52.get_cd4_tcells_panel1( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD8_Tcell" and options.panel == "P1": P52.get_cd8_tcells_panel1( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "DN_Tcell" and options.panel == "P1": P52.get_dn_tcells_panel1( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "DP_Tcell" and options.panel == "P1": P52.get_dp_tcells_panel1( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD4_Tcell" and options.panel == "P2a": P52.get_cd4_tcells_panel2( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD4_Tcell" and options.panel == "P2b": P52.get_cd4_tcells_panel2( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD8_Tcell" and options.panel == "P2a": P52.get_cd8_tcells_panel2( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD8_Tcell" and options.panel == "P2b": P52.get_cd8_tcells_panel2( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "DN_Tcell" and options.panel == "P2a": P52.get_dn_tcells_panel2( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "DN_Tcell" and options.panel == "P2b": P52.get_dn_tcells_panel2( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "DP_Tcell" and options.panel == "P2a": P52.get_dp_tcells_panel2( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "DP_Tcell" and options.panel == "P2b": P52.get_dp_tcells_panel2( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) # elif options.cell_type == "early_NK" and options.panel == "P4": # P52.get_early_nkcells_panel4(fcs_dir=options.fcs_dir, # out_dir=options.out_dir, # comp_matrix=options.comp_matrix, # panel=options.panel, # setid=options.fileset_id, # cell_subset=options.cell_type, # db=options.database) # elif options.cell_type == "terminal_NK" and options.panel == "P4": # P52.get_terminal_nkcells_panel4(fcs_dir=options.fcs_dir, # out_dir=options.out_dir, # comp_matrix=options.comp_matrix, # panel=options.panel, # setid=options.fileset_id, # cell_subset=options.cell_type, # db=options.database) # elif options.cell_type == "mature_NK" and options.panel == "P4": # P52.get_mature_nkcells_panel4(fcs_dir=options.fcs_dir, # out_dir=options.out_dir, # comp_matrix=options.comp_matrix, # panel=options.panel, # setid=options.fileset_id, # cell_subset=options.cell_type, # db=options.database) elif options.cell_type == "NKT_early" and options.panel == "P5": P52.get_early_nktcells_panel5( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "NKT_naive" and options.panel == "P5": P52.get_naive_nktcells_panel5( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "NKT_terminal" and options.panel == "P5": P52.get_terminal_nktcells_panel5( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "NKT_effector" and options.panel == "P5": P52.get_effector_nktcells_panel5( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "Vd1_Tcells" and options.panel == "P5": P52.get_Vd1_tcells_panel5( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "Vd2p_Vg9dim" and options.panel == "P5": P52.get_Vd2_vg9dim_panel5( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "Vd2n_Vg9p" and options.panel == "P5": P52.get_Vd2n_vg9p_panel5( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "hemat_SCs" and options.panel == "P5": P52.get_hemat_SC_panel5( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "immature_Bcell" and options.panel == "P6": P52.get_immature_Bcells_panel6( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "mature_Bcell" and options.panel == "P6": P52.get_mature_Bcells_panel6( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "memory_Bcell" and options.panel == "P6": P52.get_memory_Bcells_panel6( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "naive_Bcell" and options.panel == "P6": P52.get_naive_Bcells_panel6( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "IgA_Bmem" and options.panel == "P6": P52.get_IgA_Bcells_panel6( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "IgG_Bmem" and options.panel == "P6": P52.get_IgG_Bcells_panel6( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "IgM_Bmem" and options.panel == "P6": P52.get_IgM_Bcells_panel6( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "IgE_Bmem" and options.panel == "P6": P52.get_IgE_Bcells_panel6( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "monocytes" and options.panel == "P7": P52.get_monocytes_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "M_dendritic" and options.panel == "P7": P52.get_myeloid_DC_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "DC_cd123cd11c" and options.panel == "P7": P52.get_cd123cd11c_DC_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "P_dendritic" and options.panel == "P7": P52.get_plasmacytoid_DC_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD8_APCs" and options.panel == "P7": P52.get_CD8_APC_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD4_APCs" and options.panel == "P7": P52.get_CD4_APC_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD1cneg_mDC" and options.panel == "P7": P52.get_CD1cneg_mDC_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "Inflam_DC" and options.panel == "P7": P52.get_inflammatory_DC_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) elif options.cell_type == "CD16neg_DC" and options.panel == "P7": P52.get_CD16neg_DC_panel7( fcs_dir=options.fcs_dir, out_dir=options.out_dir, comp_matrix=options.comp_matrix, panel=options.panel, setid=options.fileset_id, cell_subset=options.cell_type, db=options.database, ) else: outfile = "%s/%s-%s-%s.tsv" % (options.out_dir, options.fileset_id, options.panel, options.cell_type) P.touch(outfile) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--twin-id-column", dest="id_column", type="string", help="column number or header for twin IDs in " "input tables") parser.add_option("--demographics-file", dest="demo_file", type="string", help="tab-separated text file containing twins " "demographic data") parser.add_option("--demo-id-column", dest="demo_id_column", type="string", help="column header or number that indicates the twin IDs " "that match to those from the flow cytometry data") parser.add_option("--task", dest="task", type="choice", choices=["merge_flow", "split_zygosity", "regress_confounding", "kinship"], help="choose a task") parser.add_option("--output-file-pattern", dest="out_pattern", type="string", help="output filename pattern to use where output is " "multiple files") parser.add_option("--output-directory", dest="out_dir", type="string", help="directory to output files into") parser.add_option("--zygosity-column", dest="zygosity_col", type="string", help="column header containing zygosity information") parser.add_option("--id-columns", dest="id_headers", type="string", help="comma-separated list of column headers used to " "uniquely identify each sample") parser.add_option("--family-id-column", dest="family_id", type="string", help="column header containing family IDs") parser.add_option("--confounding-column", dest="confounding", type="string", help="either a comma-separates list or single value, " "column number or column header") parser.add_option("--marker-group", dest="marker_col", type="string", help="column header containing marker IDs to group " "regression fits by") parser.add_option("--filter-zero-arrays", dest="filter_zero", action="store_true", help="Filter out arrays where there are no observations") parser.add_option("--database", dest="database", type="string", help="absolute path to SQLite database") parser.add_option("--tablename", dest="table", type="string", help="tablename to extract from SQL database") parser.add_option("--filter-gates", dest="filt_gate", type="string", help="regex to filter out unwanted triboolean gates") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) parser.set_defaults(filter_zero=True, filt_gate=None) infile = argv[-1] if options.task == "merge_flow": merged_arrays = P52.mergeGateArrays(db=options.database, table_name=options.table, filter_zero=options.filter_zero, filter_gate=options.filt_gate) all_dfs = P52.mergeArrayWithDemographics(flow_arrays=merged_arrays, id_column=options.id_column, demo_file=options.demo_file, demo_id_column=options.demo_id_column) for out_df in all_dfs: # construct the table names using cell_type, panel and gate # cell type and statistic should be in the out_pattern outname = "-".join([options.out_pattern, out_df["gate"][0], out_df["panel"][0]]) outname = outname.replace("/", "_") out_file = "/".join([options.out_dir, outname]) E.info("writing %s data to file" % outname) # file names may contain '/', replace these with "_" out_df.to_csv(out_file, sep="\t", index_col="indx") # memory useage was ballooning from R's amazing ability # to free up memory after garbage collection. P52.clearREnvironment() elif options.task == "split_zygosity": out_frames = P52.split_zygosity(infile=infile, zygosity_column=options.zygosity_col, id_headers=(options.id_headers).split(","), pair_header=options.family_id) # expect keys: MZ and DZ try: MZ_frame = out_frames["MZ"] DZ_frame = out_frames["DZ"] # output filenames using pattern and zygosity as a prefix MZ_outfile = "-".join([options.out_pattern, "MZ.tsv"]) MZ_frame.to_csv(MZ_outfile, sep="\t", index_label="indx") DZ_outfile = "-".join([options.out_pattern,"DZ.tsv"]) DZ_frame.to_csv(DZ_outfile, sep="\t", index_label="indx") except TypeError: pass elif options.task == "regress_confounding": out_df = P52.regress_out_confounding(infile=infile, confounding_column=options.confounding, group_var=options.marker_col) out_df.to_csv(options.stdout, sep="\t", index_label="indx") elif options.task == "kinship": out_df = P52.make_kinship_matrix(twins_file=infile, id_column=options.id_headers, family_column=options.family_id, zygosity_column=options.zygosity_col) out_df.to_csv(options.stdout, index_label="indx", sep="\t") # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--plot-type", dest="plot_type", type="choice", choices=["histogram", "scatter", "barchart"], help="type of plot to generate") parser.add_option("--x-axis", dest="x_axis", type="string", help="variable to plot on the X-axis." "This is the default axis for plotting.") parser.add_option("--y-axis", dest="y_axis", type="string", help="variable to plot on the Y-axis") parser.add_option("--split-by", dest="split_by", type="string", help="varible over which to split up plots") parser.add_option("--X-title", dest="x_title", type="string", help="label to attach to X-axis") parser.add_option("--Y-title", dest="y_title", type="string", help="label to attach to Y-axis") parser.add_option("--colour-var", dest="col_var", type="string", help="variable to colour points by") parser.add_option("--free-scale", dest="free_scale", type="choice", choices=["free_x", "free_y", "free"], help="whether to " "use free scaling on plot axes") parser.add_option("--outfile", dest="outfile", type="string", help="file to save plot to") parser.add_option("--melt-data", dest="melt", action="store_true", help="melt the dataframe first, requires ID vars") parser.add_option("--melt-id-vars", dest="id_vars", type="string", help="comma separated list of id variables for" " the melted dataframe") parser.add_option("--merge-frames", dest="merge", action="store_true", help="merge two input dataframes together") parser.add_option("--merge-id-vars", dest="merge_vars", type="string", help="comma separate list of id variables to merge " "two dataframes on") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) parser.set_defaults(free_scale="both", split_by=None, col_var=None, melt=False) infile = argv[-1] if len(infile.split(",")) == 2: infiles = infile.split(",") df1 = pd.read_table(infiles[0], sep=":", index_col=0, header=0) df2 = pd.read_table(infiles[1], sep=":", index_col=0, header=0) ids = options.merge_vars.split(",") df1[options.y_axis] = df1.index df2[options.y_axis] = df2.index df1.columns = [ids[0], options.y_axis] df2.columns = [ids[0], options.y_axis] df = pd.merge(df1, df2, on=options.y_axis) # these need to not be hard-coded! df.columns = ["mean_h2", ids[0], "fano_h2"] else: df = pd.read_table(infile, sep="\t", index_col=0, header=0) # assumes the first column is the index if options.melt: mids = options.id_vars.split(",") if options.y_axis: _df = pd.melt(df, id_vars=options.x_axis, value_name=options.y_axis, var_name=options.col_var) else: _df = pd.melt(df, id_vars=mids, value_name=options.x_axis, var_name=options.col_var) df = _df else: pass # check variables are present try: var = df[options.x_axis] except ValueError: raise ValueError("no plotting variable found") if options.col_var: try: cols = df[options.col_var] except ValueError: E.warn("Colour variable not found in data frame." "Check the data file is the correct one") else: pass if options.split_by: try: splits = df[options.split_by] except ValueError: E.warn("Split-by variable not found in the data " "frame. Check the data file is the correct" " one.") else: pass try: assert options.outfile except AssertionError: raise IOError("no output file detected") if options.plot_type == "histogram": P52.plotHistogram(data=df, variable=options.x_axis, save_path=options.outfile, x_title=options.x_title, y_title=options.y_title, colour_var=options.col_var, scales=options.free_scale, split_var=options.split_by) elif options.plot_type == "barchart": P52.plotBarchart(data=df, x_variable=options.x_axis, y_variable=options.y_axis, save_path=options.outfile, x_title=options.x_title, y_title=options.y_title, colour_var=options.col_var, split_var=options.split_by) else: pass # write footer and output benchmark information. E.Stop()