Ejemplo n.º 1
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--method",
                      dest="method",
                      type="choice",
                      choices=["compensation", "parse_gating"],
                      help="select method to perform on workspace "
                      "file.")

    parser.add_option("--gating-directory",
                      dest="gate_dir",
                      type="string",
                      help="directory to store gating dummy files")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    # write footer and output benchmark information.
    E.Stop()

    infile = argv[-1]

    if options.method == "compensation":
        split_file = infile.split("/")
        infile = split_file[-1]
        split_file.remove(infile)
        path = "/".join(split_file)
        out_df = P52.get_compensation_matrix(path=path, infile=infile)
        out_df.to_csv(options.stdout, sep="\t")

    elif options.method == "parse_gating":
        for dfile in P52.parse_gating_file(infile):
            outfile = options.gate_dir + "/" + dfile
            P.touch(outfile)
    else:
        pass
Ejemplo n.º 2
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--id-column",
                      dest="id_col",
                      type="string",
                      help="column header for sample IDs")

    parser.add_option("--matrix-distance",
                      dest="dist",
                      type="choice",
                      choices=["Euclid"],
                      help="distance metric to use "
                      "for distance between matrices")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)
    parser.set_defaults(filter_zero=True, filt_gate=None)

    infile = argv[-1]
    # the input file is a list of files
    myfunc = lambda x: x.rstrip("\n")

    with open(infile, "r") as ifile:
        all_files = ifile.readlines()

    list_of_files = map(myfunc, all_files)

    matrix_list = P52.makeMatrixList(list_of_files=list_of_files,
                                     id_col=options.id_col)
    distance_matrix = P52.getMatrixDistances(list_of_matrices=matrix_list,
                                             distance=options.dist)

    distance_matrix.to_csv(options.stdout, sep="\t", index_label=None)

    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 3
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--method", dest="method", type="choice",
                      choices=["compensation", "parse_gating"],
                      help="select method to perform on workspace "
                      "file.")

    parser.add_option("--gating-directory", dest="gate_dir", type="string",
                      help="directory to store gating dummy files")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    # write footer and output benchmark information.
    E.Stop()

    infile = argv[-1]

    if options.method == "compensation":
        split_file = infile.split("/")
        infile = split_file[-1]
        split_file.remove(infile)
        path = "/".join(split_file)
        out_df = P52.get_compensation_matrix(path=path,
                                             infile=infile)
        out_df.to_csv(options.stdout, sep="\t")

    elif options.method == "parse_gating":
        for dfile in P52.parse_gating_file(infile):
            outfile = options.gate_dir + "/" + dfile
            P.touch(outfile)
    else:
        pass
Ejemplo n.º 4
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--id-column", dest="id_col", type="string",
                      help="column header for sample IDs")

    parser.add_option("--matrix-distance", dest="dist", type="choice",
                      choices=["Euclid"], help="distance metric to use "
                      "for distance between matrices")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)
    parser.set_defaults(filter_zero=True,
                        filt_gate=None)

    infile = argv[-1]
    # the input file is a list of files
    myfunc = lambda x: x.rstrip("\n")

    with open(infile, "r") as ifile:
        all_files = ifile.readlines()

    list_of_files = map(myfunc, all_files)

    matrix_list = P52.makeMatrixList(list_of_files=list_of_files,
                                     id_col=options.id_col)
    distance_matrix = P52.getMatrixDistances(list_of_matrices=matrix_list,
                                             distance=options.dist)

    distance_matrix.to_csv(options.stdout, sep="\t", index_label=None)

    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 5
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--task", dest="task", type="choice",
                      choices=["heritability", "merge",
                               "kinship"],
                      help="task")

    parser.add_option("--heritability", dest="equation", type="string",
                      help="equation used to estimate heritability")

    parser.add_option("--monozygote-file", dest="mz_file", type="string",
                      help="file containing monozygotic twin data")

    parser.add_option("--dizygote-file", dest="dz_file", type="string",
                      help="file containing dizygotic twin data")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if options.task == "heritability":
        h_df = P52.estimate_heritability(mz_file=options.mz_file,
                                         dz_file=options.dz_file)

        # generate scatter plots of each marker
        outdir = "/".join(options.dz_file.split("/")[:-1])
        plot_out = "-".join(options.dz_file.split("/")[-1].split("-")[1:4])
        plot_out = os.path.join(outdir, plot_out)
        E.info("plotting correlations to %s" % plot_out)
        R('''suppressPackageStartupMessages(library(ggplot2))''')
        R('''mz.df <- read.table("%s", sep="\t", h=T, row.names=1)''' % options.mz_file)
        R('''dz.df <- read.table("%s", sep="\t", h=T, row.names=1)''' % options.dz_file)
        R('''all.dz <- data.frame(rbind(mz.df, dz.df))''')
        R('''p_cor <- ggplot(all.dz, aes(x=twin1, y=twin2, '''
          '''colour=zygosity)) + '''
          '''geom_point(size=1) + stat_smooth(method=lm) + '''
          '''facet_wrap( ~ marker, scales="free")''')
        R('''png("%s-cors.png", height=720, width=720)''' % plot_out)
        R('''print(p_cor)''')
        R('''dev.off()''')

        options.stdout.write("H^2\n")
        for key in h_df.keys():
            options.stdout.write("%s: %0.3f\n" % (key, h_df[key]))
    elif options.task == "merge":
        infiles = argv[-1]
        infiles = infiles.split(",")
        out_df = P52.merge_heritability(infiles)
        out_df.to_csv(options.stdout, sep="\t")

    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 6
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--plot-type", dest="plot_type", type="choice",
                      choices=["histogram", "scatter", "barchart"],
                      help="type of plot to generate")

    parser.add_option("--x-axis", dest="x_axis", type="string",
                      help="variable to plot on the X-axis."
                      "This is the default axis for plotting.")

    parser.add_option("--y-axis", dest="y_axis", type="string",
                      help="variable to plot on the Y-axis")

    parser.add_option("--split-by", dest="split_by", type="string",
                      help="varible over which to split up plots")

    parser.add_option("--X-title", dest="x_title", type="string",
                      help="label to attach to X-axis")

    parser.add_option("--Y-title", dest="y_title", type="string",
                      help="label to attach to Y-axis")

    parser.add_option("--colour-var", dest="col_var", type="string",
                      help="variable to colour points by")

    parser.add_option("--free-scale", dest="free_scale", type="choice",
                      choices=["free_x", "free_y", "free"], help="whether to "
                      "use free scaling on plot axes")

    parser.add_option("--outfile", dest="outfile", type="string",
                      help="file to save plot to")

    parser.add_option("--melt-data", dest="melt", action="store_true",
                      help="melt the dataframe first, requires ID vars")

    parser.add_option("--melt-id-vars", dest="id_vars", type="string",
                      help="comma separated list of id variables for"
                      " the melted dataframe")

    parser.add_option("--merge-frames", dest="merge", action="store_true",
                      help="merge two input dataframes together")

    parser.add_option("--merge-id-vars", dest="merge_vars", type="string",
                      help="comma separate list of id variables to merge "
                      "two dataframes on")
    
    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)
    parser.set_defaults(free_scale="both",
                        split_by=None,
                        col_var=None,
                        melt=False)

    infile = argv[-1]

    if len(infile.split(",")) == 2:
        infiles = infile.split(",")
        df1 = pd.read_table(infiles[0], sep=":", index_col=0,
                            header=0)
        df2 = pd.read_table(infiles[1], sep=":", index_col=0,
                            header=0)
        ids = options.merge_vars.split(",")
        df1[options.y_axis] = df1.index
        df2[options.y_axis] = df2.index

        df1.columns = [ids[0], options.y_axis]
        df2.columns = [ids[0], options.y_axis]

        df = pd.merge(df1, df2, on=options.y_axis)

        # these need to not be hard-coded!
        df.columns = ["mean_h2", ids[0], "fano_h2"]
    else:
        df = pd.read_table(infile, sep="\t", index_col=0,
                           header=0)

    # assumes the first column is the index
    if options.melt:
        mids = options.id_vars.split(",")
        if options.y_axis:
            _df = pd.melt(df, id_vars=options.x_axis, value_name=options.y_axis,
                          var_name=options.col_var)
        else:
            _df = pd.melt(df, id_vars=mids, value_name=options.x_axis,
                          var_name=options.col_var)
        df = _df
    else:
        pass

    # check variables are present
    try:
        var = df[options.x_axis]
    except ValueError:
        raise ValueError("no plotting variable found")

    if options.col_var:
        try:
            cols = df[options.col_var]
        except ValueError:
            E.warn("Colour variable not found in data frame."
                   "Check the data file is the correct one")
    else:
        pass

    if options.split_by:
        try:
            splits = df[options.split_by]
        except ValueError:
            E.warn("Split-by variable not found in the data "
                   "frame.  Check the data file is the correct"
                   " one.")
    else:
        pass

    try:
        assert options.outfile
    except AssertionError:
        raise IOError("no output file detected")

    if options.plot_type == "histogram":
        P52.plotHistogram(data=df,
                          variable=options.x_axis,
                          save_path=options.outfile,
                          x_title=options.x_title,
                          y_title=options.y_title,
                          colour_var=options.col_var,
                          scales=options.free_scale,
                          split_var=options.split_by)
    elif options.plot_type == "barchart":
        P52.plotBarchart(data=df,
                         x_variable=options.x_axis,
                         y_variable=options.y_axis,
                         save_path=options.outfile,
                         x_title=options.x_title,
                         y_title=options.y_title,
                         colour_var=options.col_var,
                         split_var=options.split_by)
    else:
        pass

    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 7
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--twin-id-column",
                      dest="id_column",
                      type="string",
                      help="column number or header for twin IDs in "
                      "input tables")

    parser.add_option("--demographics-file",
                      dest="demo_file",
                      type="string",
                      help="tab-separated text file containing twins "
                      "demographic data")

    parser.add_option(
        "--demo-id-column",
        dest="demo_id_column",
        type="string",
        help="column header or number that indicates the twin IDs "
        "that match to those from the flow cytometry data")

    parser.add_option("--task",
                      dest="task",
                      type="choice",
                      choices=[
                          "merge_flow", "split_zygosity",
                          "regress_confounding", "kinship"
                      ],
                      help="choose a task")

    parser.add_option("--output-file-pattern",
                      dest="out_pattern",
                      type="string",
                      help="output filename pattern to use where output is "
                      "multiple files")

    parser.add_option("--output-directory",
                      dest="out_dir",
                      type="string",
                      help="directory to output files into")

    parser.add_option("--zygosity-column",
                      dest="zygosity_col",
                      type="string",
                      help="column header containing zygosity information")

    parser.add_option("--id-columns",
                      dest="id_headers",
                      type="string",
                      help="comma-separated list of column headers used to "
                      "uniquely identify each sample")

    parser.add_option("--family-id-column",
                      dest="family_id",
                      type="string",
                      help="column header containing family IDs")

    parser.add_option("--confounding-column",
                      dest="confounding",
                      type="string",
                      help="either a comma-separates list or single value, "
                      "column number or column header")

    parser.add_option("--marker-group",
                      dest="marker_col",
                      type="string",
                      help="column header containing marker IDs to group "
                      "regression fits by")

    parser.add_option("--filter-zero-arrays",
                      dest="filter_zero",
                      action="store_true",
                      help="Filter out arrays where there are no observations")

    parser.add_option("--database",
                      dest="database",
                      type="string",
                      help="absolute path to SQLite database")

    parser.add_option("--tablename",
                      dest="table",
                      type="string",
                      help="tablename to extract from SQL database")

    parser.add_option("--filter-gates",
                      dest="filt_gate",
                      type="string",
                      help="regex to filter out unwanted triboolean gates")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)
    parser.set_defaults(filter_zero=True, filt_gate=None)

    infile = argv[-1]

    if options.task == "merge_flow":

        merged_arrays = P52.mergeGateArrays(db=options.database,
                                            table_name=options.table,
                                            filter_zero=options.filter_zero,
                                            filter_gate=options.filt_gate)

        all_dfs = P52.mergeArrayWithDemographics(
            flow_arrays=merged_arrays,
            id_column=options.id_column,
            demo_file=options.demo_file,
            demo_id_column=options.demo_id_column)

        for out_df in all_dfs:
            # construct the table names using cell_type, panel and gate
            # cell type and statistic should be in the out_pattern
            outname = "-".join(
                [options.out_pattern, out_df["gate"][0], out_df["panel"][0]])
            outname = outname.replace("/", "_")
            out_file = "/".join([options.out_dir, outname])
            E.info("writing %s data to file" % outname)
            # file names may contain '/', replace these with "_"
            out_df.to_csv(out_file, sep="\t", index_col="indx")
            # memory useage was ballooning from R's amazing ability
            # to free up memory after garbage collection.
            P52.clearREnvironment()

    elif options.task == "split_zygosity":
        out_frames = P52.split_zygosity(
            infile=infile,
            zygosity_column=options.zygosity_col,
            id_headers=(options.id_headers).split(","),
            pair_header=options.family_id)
        # expect keys: MZ and DZ
        try:
            MZ_frame = out_frames["MZ"]
            DZ_frame = out_frames["DZ"]

            # output filenames using pattern and zygosity as a prefix
            MZ_outfile = "-".join([options.out_pattern, "MZ.tsv"])
            MZ_frame.to_csv(MZ_outfile, sep="\t", index_label="indx")

            DZ_outfile = "-".join([options.out_pattern, "DZ.tsv"])
            DZ_frame.to_csv(DZ_outfile, sep="\t", index_label="indx")
        except TypeError:
            pass

    elif options.task == "regress_confounding":
        out_df = P52.regress_out_confounding(
            infile=infile,
            confounding_column=options.confounding,
            group_var=options.marker_col)

        out_df.to_csv(options.stdout, sep="\t", index_label="indx")
    elif options.task == "kinship":
        out_df = P52.make_kinship_matrix(twins_file=infile,
                                         id_column=options.id_headers,
                                         family_column=options.family_id,
                                         zygosity_column=options.zygosity_col)

        out_df.to_csv(options.stdout, index_label="indx", sep="\t")

    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 8
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--task",
                      dest="task",
                      type="choice",
                      choices=["heritability", "merge", "kinship"],
                      help="task")

    parser.add_option("--heritability",
                      dest="equation",
                      type="string",
                      help="equation used to estimate heritability")

    parser.add_option("--monozygote-file",
                      dest="mz_file",
                      type="string",
                      help="file containing monozygotic twin data")

    parser.add_option("--dizygote-file",
                      dest="dz_file",
                      type="string",
                      help="file containing dizygotic twin data")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if options.task == "heritability":
        h_df = P52.estimate_heritability(mz_file=options.mz_file,
                                         dz_file=options.dz_file)

        # generate scatter plots of each marker
        outdir = "/".join(options.dz_file.split("/")[:-1])
        plot_out = "-".join(options.dz_file.split("/")[-1].split("-")[1:4])
        plot_out = os.path.join(outdir, plot_out)
        E.info("plotting correlations to %s" % plot_out)
        R('''suppressPackageStartupMessages(library(ggplot2))''')
        R('''mz.df <- read.table("%s", sep="\t", h=T, row.names=1)''' %
          options.mz_file)
        R('''dz.df <- read.table("%s", sep="\t", h=T, row.names=1)''' %
          options.dz_file)
        R('''all.dz <- data.frame(rbind(mz.df, dz.df))''')
        R('''p_cor <- ggplot(all.dz, aes(x=twin1, y=twin2, '''
          '''colour=zygosity)) + '''
          '''geom_point(size=1) + stat_smooth(method=lm) + '''
          '''facet_wrap( ~ marker, scales="free")''')
        R('''png("%s-cors.png", height=720, width=720)''' % plot_out)
        R('''print(p_cor)''')
        R('''dev.off()''')

        options.stdout.write("H^2\n")
        for key in h_df.keys():
            options.stdout.write("%s: %0.3f\n" % (key, h_df[key]))
    elif options.task == "merge":
        infiles = argv[-1]
        infiles = infiles.split(",")
        out_df = P52.merge_heritability(infiles)
        out_df.to_csv(options.stdout, sep="\t")

    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 9
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--fcs-directory",
                      dest="fcs_dir",
                      type="string",
                      help="path to directory containing .fcs files"
                      " for processsing")

    parser.add_option("--output-format",
                      dest="out_format",
                      type="choice",
                      choices=("summary", "intensity"),
                      help="output either"
                      " summary of intensities or raw intensity data")

    parser.add_option("--summary-stats",
                      dest="stats",
                      type="choice",
                      choices=("fano", "mean", "std", "var", "median",
                               "geometric", "regress"),
                      help="summary statistics to output if "
                      "out_format == summary")

    parser.add_option("--gating-strategy",
                      dest="gates",
                      type="string",
                      help=".tsv of gating strategy.  See docs for details")

    parser.add_option(
        "--compensation-matrix",
        dest="comp_matrix",
        type="string",
        help="text file containing the compensation/spillover matrix")

    parser.add_option("--cell-type",
                      dest="cell_type",
                      type="string",
                      help="description of cell type gene expression "
                      "is measured on.  Will be added to output file name")

    parser.add_option("--panel-id",
                      dest="panel",
                      type="string",
                      help="ID for marker panel")

    parser.add_option("--output-directory",
                      dest="out_dir",
                      type="string",
                      help="output directory path for files")

    parser.add_option(
        "--fileset-identifier",
        dest="fileset_id",
        type="string",
        help="unique identifier for sets of files/samples processed "
        "together.  Useful for assigning to batches for processing")

    parser.add_option("--database",
                      dest="database",
                      type="string",
                      help="SQLite database to write results to ")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)
    E.info("Processing data for %s in panel %s" %
           (options.cell_type, options.panel))

    if options.cell_type == "CD4_Tmem" and options.panel == "P3":
        P52.get_cd4_Tmem_panel3(fcs_dir=options.fcs_dir,
                                out_dir=options.out_dir,
                                comp_matrix=options.comp_matrix,
                                panel=options.panel,
                                setid=options.fileset_id,
                                cell_subset=options.cell_type,
                                db=options.database)
    elif options.cell_type == "CD8_Tmem" and options.panel == "P3":
        P52.get_cd8_Tmem_panel3(fcs_dir=options.fcs_dir,
                                out_dir=options.out_dir,
                                comp_matrix=options.comp_matrix,
                                panel=options.panel,
                                setid=options.fileset_id,
                                cell_subset=options.cell_type,
                                db=options.database)
    elif options.cell_type == "CD4_Tnaive" and options.panel == "P3":
        P52.get_cd4_naive_panel3(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "CD8_Tnaive" and options.panel == "P3":
        P52.get_cd8_naive_panel3(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "CD4_Tcell" and options.panel == "P1":
        P52.get_cd4_tcells_panel1(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "CD8_Tcell" and options.panel == "P1":
        P52.get_cd8_tcells_panel1(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "DN_Tcell" and options.panel == "P1":
        P52.get_dn_tcells_panel1(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "DP_Tcell" and options.panel == "P1":
        P52.get_dp_tcells_panel1(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "CD4_Tcell" and options.panel == "P2a":
        P52.get_cd4_tcells_panel2(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "CD4_Tcell" and options.panel == "P2b":
        P52.get_cd4_tcells_panel2(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "CD8_Tcell" and options.panel == "P2a":
        P52.get_cd8_tcells_panel2(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "CD8_Tcell" and options.panel == "P2b":
        P52.get_cd8_tcells_panel2(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "DN_Tcell" and options.panel == "P2a":
        P52.get_dn_tcells_panel2(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "DN_Tcell" and options.panel == "P2b":
        P52.get_dn_tcells_panel2(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "DP_Tcell" and options.panel == "P2a":
        P52.get_dp_tcells_panel2(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "DP_Tcell" and options.panel == "P2b":
        P52.get_dp_tcells_panel2(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    # elif options.cell_type == "early_NK" and options.panel == "P4":
    #     P52.get_early_nkcells_panel4(fcs_dir=options.fcs_dir,
    #                                  out_dir=options.out_dir,
    #                                  comp_matrix=options.comp_matrix,
    #                                  panel=options.panel,
    #                                  setid=options.fileset_id,
    #                                  cell_subset=options.cell_type,
    #                                  db=options.database)
    # elif options.cell_type == "terminal_NK" and options.panel == "P4":
    #     P52.get_terminal_nkcells_panel4(fcs_dir=options.fcs_dir,
    #                                     out_dir=options.out_dir,
    #                                     comp_matrix=options.comp_matrix,
    #                                     panel=options.panel,
    #                                     setid=options.fileset_id,
    #                                     cell_subset=options.cell_type,
    #                                     db=options.database)
    # elif options.cell_type == "mature_NK" and options.panel == "P4":
    #     P52.get_mature_nkcells_panel4(fcs_dir=options.fcs_dir,
    #                                   out_dir=options.out_dir,
    #                                   comp_matrix=options.comp_matrix,
    #                                   panel=options.panel,
    #                                   setid=options.fileset_id,
    #                                   cell_subset=options.cell_type,
    #                                   db=options.database)
    elif options.cell_type == "NKT_early" and options.panel == "P5":
        P52.get_early_nktcells_panel5(fcs_dir=options.fcs_dir,
                                      out_dir=options.out_dir,
                                      comp_matrix=options.comp_matrix,
                                      panel=options.panel,
                                      setid=options.fileset_id,
                                      cell_subset=options.cell_type,
                                      db=options.database)
    elif options.cell_type == "NKT_naive" and options.panel == "P5":
        P52.get_naive_nktcells_panel5(fcs_dir=options.fcs_dir,
                                      out_dir=options.out_dir,
                                      comp_matrix=options.comp_matrix,
                                      panel=options.panel,
                                      setid=options.fileset_id,
                                      cell_subset=options.cell_type,
                                      db=options.database)
    elif options.cell_type == "NKT_terminal" and options.panel == "P5":
        P52.get_terminal_nktcells_panel5(fcs_dir=options.fcs_dir,
                                         out_dir=options.out_dir,
                                         comp_matrix=options.comp_matrix,
                                         panel=options.panel,
                                         setid=options.fileset_id,
                                         cell_subset=options.cell_type,
                                         db=options.database)
    elif options.cell_type == "NKT_effector" and options.panel == "P5":
        P52.get_effector_nktcells_panel5(fcs_dir=options.fcs_dir,
                                         out_dir=options.out_dir,
                                         comp_matrix=options.comp_matrix,
                                         panel=options.panel,
                                         setid=options.fileset_id,
                                         cell_subset=options.cell_type,
                                         db=options.database)
    elif options.cell_type == "Vd1_Tcells" and options.panel == "P5":
        P52.get_Vd1_tcells_panel5(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "Vd2p_Vg9dim" and options.panel == "P5":
        P52.get_Vd2_vg9dim_panel5(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "Vd2n_Vg9p" and options.panel == "P5":
        P52.get_Vd2n_vg9p_panel5(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "hemat_SCs" and options.panel == "P5":
        P52.get_hemat_SC_panel5(fcs_dir=options.fcs_dir,
                                out_dir=options.out_dir,
                                comp_matrix=options.comp_matrix,
                                panel=options.panel,
                                setid=options.fileset_id,
                                cell_subset=options.cell_type,
                                db=options.database)
    elif options.cell_type == "immature_Bcell" and options.panel == "P6":
        P52.get_immature_Bcells_panel6(fcs_dir=options.fcs_dir,
                                       out_dir=options.out_dir,
                                       comp_matrix=options.comp_matrix,
                                       panel=options.panel,
                                       setid=options.fileset_id,
                                       cell_subset=options.cell_type,
                                       db=options.database)
    elif options.cell_type == "mature_Bcell" and options.panel == "P6":
        P52.get_mature_Bcells_panel6(fcs_dir=options.fcs_dir,
                                     out_dir=options.out_dir,
                                     comp_matrix=options.comp_matrix,
                                     panel=options.panel,
                                     setid=options.fileset_id,
                                     cell_subset=options.cell_type,
                                     db=options.database)
    elif options.cell_type == "memory_Bcell" and options.panel == "P6":
        P52.get_memory_Bcells_panel6(fcs_dir=options.fcs_dir,
                                     out_dir=options.out_dir,
                                     comp_matrix=options.comp_matrix,
                                     panel=options.panel,
                                     setid=options.fileset_id,
                                     cell_subset=options.cell_type,
                                     db=options.database)
    elif options.cell_type == "naive_Bcell" and options.panel == "P6":
        P52.get_naive_Bcells_panel6(fcs_dir=options.fcs_dir,
                                    out_dir=options.out_dir,
                                    comp_matrix=options.comp_matrix,
                                    panel=options.panel,
                                    setid=options.fileset_id,
                                    cell_subset=options.cell_type,
                                    db=options.database)
    elif options.cell_type == "IgA_Bmem" and options.panel == "P6":
        P52.get_IgA_Bcells_panel6(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "IgG_Bmem" and options.panel == "P6":
        P52.get_IgG_Bcells_panel6(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "IgM_Bmem" and options.panel == "P6":
        P52.get_IgM_Bcells_panel6(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "IgE_Bmem" and options.panel == "P6":
        P52.get_IgE_Bcells_panel6(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "monocytes" and options.panel == "P7":
        P52.get_monocytes_panel7(fcs_dir=options.fcs_dir,
                                 out_dir=options.out_dir,
                                 comp_matrix=options.comp_matrix,
                                 panel=options.panel,
                                 setid=options.fileset_id,
                                 cell_subset=options.cell_type,
                                 db=options.database)
    elif options.cell_type == "M_dendritic" and options.panel == "P7":
        P52.get_myeloid_DC_panel7(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    elif options.cell_type == "DC_cd123cd11c" and options.panel == "P7":
        P52.get_cd123cd11c_DC_panel7(fcs_dir=options.fcs_dir,
                                     out_dir=options.out_dir,
                                     comp_matrix=options.comp_matrix,
                                     panel=options.panel,
                                     setid=options.fileset_id,
                                     cell_subset=options.cell_type,
                                     db=options.database)
    elif options.cell_type == "P_dendritic" and options.panel == "P7":
        P52.get_plasmacytoid_DC_panel7(fcs_dir=options.fcs_dir,
                                       out_dir=options.out_dir,
                                       comp_matrix=options.comp_matrix,
                                       panel=options.panel,
                                       setid=options.fileset_id,
                                       cell_subset=options.cell_type,
                                       db=options.database)
    elif options.cell_type == "CD8_APCs" and options.panel == "P7":
        P52.get_CD8_APC_panel7(fcs_dir=options.fcs_dir,
                               out_dir=options.out_dir,
                               comp_matrix=options.comp_matrix,
                               panel=options.panel,
                               setid=options.fileset_id,
                               cell_subset=options.cell_type,
                               db=options.database)
    elif options.cell_type == "CD4_APCs" and options.panel == "P7":
        P52.get_CD4_APC_panel7(fcs_dir=options.fcs_dir,
                               out_dir=options.out_dir,
                               comp_matrix=options.comp_matrix,
                               panel=options.panel,
                               setid=options.fileset_id,
                               cell_subset=options.cell_type,
                               db=options.database)
    elif options.cell_type == "CD1cneg_mDC" and options.panel == "P7":
        P52.get_CD1cneg_mDC_panel7(fcs_dir=options.fcs_dir,
                                   out_dir=options.out_dir,
                                   comp_matrix=options.comp_matrix,
                                   panel=options.panel,
                                   setid=options.fileset_id,
                                   cell_subset=options.cell_type,
                                   db=options.database)
    elif options.cell_type == "Inflam_DC" and options.panel == "P7":
        P52.get_inflammatory_DC_panel7(fcs_dir=options.fcs_dir,
                                       out_dir=options.out_dir,
                                       comp_matrix=options.comp_matrix,
                                       panel=options.panel,
                                       setid=options.fileset_id,
                                       cell_subset=options.cell_type,
                                       db=options.database)
    elif options.cell_type == "CD16neg_DC" and options.panel == "P7":
        P52.get_CD16neg_DC_panel7(fcs_dir=options.fcs_dir,
                                  out_dir=options.out_dir,
                                  comp_matrix=options.comp_matrix,
                                  panel=options.panel,
                                  setid=options.fileset_id,
                                  cell_subset=options.cell_type,
                                  db=options.database)
    else:
        outfile = "%s/%s-%s-%s.tsv" % (options.out_dir, options.fileset_id,
                                       options.panel, options.cell_type)
        P.touch(outfile)

    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 10
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string", help="supply help")

    parser.add_option(
        "--fcs-directory",
        dest="fcs_dir",
        type="string",
        help="path to directory containing .fcs files" " for processsing",
    )

    parser.add_option(
        "--output-format",
        dest="out_format",
        type="choice",
        choices=("summary", "intensity"),
        help="output either" " summary of intensities or raw intensity data",
    )

    parser.add_option(
        "--summary-stats",
        dest="stats",
        type="choice",
        choices=("fano", "mean", "std", "var", "median", "geometric", "regress"),
        help="summary statistics to output if " "out_format == summary",
    )

    parser.add_option(
        "--gating-strategy", dest="gates", type="string", help=".tsv of gating strategy.  See docs for details"
    )

    parser.add_option(
        "--compensation-matrix",
        dest="comp_matrix",
        type="string",
        help="text file containing the compensation/spillover matrix",
    )

    parser.add_option(
        "--cell-type",
        dest="cell_type",
        type="string",
        help="description of cell type gene expression " "is measured on.  Will be added to output file name",
    )

    parser.add_option("--panel-id", dest="panel", type="string", help="ID for marker panel")

    parser.add_option("--output-directory", dest="out_dir", type="string", help="output directory path for files")

    parser.add_option(
        "--fileset-identifier",
        dest="fileset_id",
        type="string",
        help="unique identifier for sets of files/samples processed "
        "together.  Useful for assigning to batches for processing",
    )

    parser.add_option("--database", dest="database", type="string", help="SQLite database to write results to ")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)
    E.info("Processing data for %s in panel %s" % (options.cell_type, options.panel))

    if options.cell_type == "CD4_Tmem" and options.panel == "P3":
        P52.get_cd4_Tmem_panel3(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD8_Tmem" and options.panel == "P3":
        P52.get_cd8_Tmem_panel3(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD4_Tnaive" and options.panel == "P3":
        P52.get_cd4_naive_panel3(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD8_Tnaive" and options.panel == "P3":
        P52.get_cd8_naive_panel3(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD4_Tcell" and options.panel == "P1":
        P52.get_cd4_tcells_panel1(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD8_Tcell" and options.panel == "P1":
        P52.get_cd8_tcells_panel1(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "DN_Tcell" and options.panel == "P1":
        P52.get_dn_tcells_panel1(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "DP_Tcell" and options.panel == "P1":
        P52.get_dp_tcells_panel1(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD4_Tcell" and options.panel == "P2a":
        P52.get_cd4_tcells_panel2(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD4_Tcell" and options.panel == "P2b":
        P52.get_cd4_tcells_panel2(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD8_Tcell" and options.panel == "P2a":
        P52.get_cd8_tcells_panel2(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD8_Tcell" and options.panel == "P2b":
        P52.get_cd8_tcells_panel2(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "DN_Tcell" and options.panel == "P2a":
        P52.get_dn_tcells_panel2(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "DN_Tcell" and options.panel == "P2b":
        P52.get_dn_tcells_panel2(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "DP_Tcell" and options.panel == "P2a":
        P52.get_dp_tcells_panel2(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "DP_Tcell" and options.panel == "P2b":
        P52.get_dp_tcells_panel2(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    # elif options.cell_type == "early_NK" and options.panel == "P4":
    #     P52.get_early_nkcells_panel4(fcs_dir=options.fcs_dir,
    #                                  out_dir=options.out_dir,
    #                                  comp_matrix=options.comp_matrix,
    #                                  panel=options.panel,
    #                                  setid=options.fileset_id,
    #                                  cell_subset=options.cell_type,
    #                                  db=options.database)
    # elif options.cell_type == "terminal_NK" and options.panel == "P4":
    #     P52.get_terminal_nkcells_panel4(fcs_dir=options.fcs_dir,
    #                                     out_dir=options.out_dir,
    #                                     comp_matrix=options.comp_matrix,
    #                                     panel=options.panel,
    #                                     setid=options.fileset_id,
    #                                     cell_subset=options.cell_type,
    #                                     db=options.database)
    # elif options.cell_type == "mature_NK" and options.panel == "P4":
    #     P52.get_mature_nkcells_panel4(fcs_dir=options.fcs_dir,
    #                                   out_dir=options.out_dir,
    #                                   comp_matrix=options.comp_matrix,
    #                                   panel=options.panel,
    #                                   setid=options.fileset_id,
    #                                   cell_subset=options.cell_type,
    #                                   db=options.database)
    elif options.cell_type == "NKT_early" and options.panel == "P5":
        P52.get_early_nktcells_panel5(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "NKT_naive" and options.panel == "P5":
        P52.get_naive_nktcells_panel5(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "NKT_terminal" and options.panel == "P5":
        P52.get_terminal_nktcells_panel5(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "NKT_effector" and options.panel == "P5":
        P52.get_effector_nktcells_panel5(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "Vd1_Tcells" and options.panel == "P5":
        P52.get_Vd1_tcells_panel5(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "Vd2p_Vg9dim" and options.panel == "P5":
        P52.get_Vd2_vg9dim_panel5(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "Vd2n_Vg9p" and options.panel == "P5":
        P52.get_Vd2n_vg9p_panel5(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "hemat_SCs" and options.panel == "P5":
        P52.get_hemat_SC_panel5(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "immature_Bcell" and options.panel == "P6":
        P52.get_immature_Bcells_panel6(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "mature_Bcell" and options.panel == "P6":
        P52.get_mature_Bcells_panel6(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "memory_Bcell" and options.panel == "P6":
        P52.get_memory_Bcells_panel6(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "naive_Bcell" and options.panel == "P6":
        P52.get_naive_Bcells_panel6(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "IgA_Bmem" and options.panel == "P6":
        P52.get_IgA_Bcells_panel6(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "IgG_Bmem" and options.panel == "P6":
        P52.get_IgG_Bcells_panel6(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "IgM_Bmem" and options.panel == "P6":
        P52.get_IgM_Bcells_panel6(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "IgE_Bmem" and options.panel == "P6":
        P52.get_IgE_Bcells_panel6(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "monocytes" and options.panel == "P7":
        P52.get_monocytes_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "M_dendritic" and options.panel == "P7":
        P52.get_myeloid_DC_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "DC_cd123cd11c" and options.panel == "P7":
        P52.get_cd123cd11c_DC_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "P_dendritic" and options.panel == "P7":
        P52.get_plasmacytoid_DC_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD8_APCs" and options.panel == "P7":
        P52.get_CD8_APC_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD4_APCs" and options.panel == "P7":
        P52.get_CD4_APC_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD1cneg_mDC" and options.panel == "P7":
        P52.get_CD1cneg_mDC_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "Inflam_DC" and options.panel == "P7":
        P52.get_inflammatory_DC_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    elif options.cell_type == "CD16neg_DC" and options.panel == "P7":
        P52.get_CD16neg_DC_panel7(
            fcs_dir=options.fcs_dir,
            out_dir=options.out_dir,
            comp_matrix=options.comp_matrix,
            panel=options.panel,
            setid=options.fileset_id,
            cell_subset=options.cell_type,
            db=options.database,
        )
    else:
        outfile = "%s/%s-%s-%s.tsv" % (options.out_dir, options.fileset_id, options.panel, options.cell_type)
        P.touch(outfile)

    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 11
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--twin-id-column", dest="id_column", type="string",
                      help="column number or header for twin IDs in "
                      "input tables")

    parser.add_option("--demographics-file", dest="demo_file", type="string",
                      help="tab-separated text file containing twins "
                      "demographic data")

    parser.add_option("--demo-id-column", dest="demo_id_column", type="string",
                      help="column header or number that indicates the twin IDs "
                      "that match to those from the flow cytometry data")

    parser.add_option("--task", dest="task", type="choice",
                      choices=["merge_flow", "split_zygosity",
                               "regress_confounding", "kinship"],
                      help="choose a task")

    parser.add_option("--output-file-pattern", dest="out_pattern", type="string",
                      help="output filename pattern to use where output is "
                      "multiple files")

    parser.add_option("--output-directory", dest="out_dir", type="string",
                      help="directory to output files into")

    parser.add_option("--zygosity-column", dest="zygosity_col", type="string",
                      help="column header containing zygosity information")

    parser.add_option("--id-columns", dest="id_headers", type="string",
                      help="comma-separated list of column headers used to "
                      "uniquely identify each sample")

    parser.add_option("--family-id-column", dest="family_id", type="string",
                      help="column header containing family IDs")

    parser.add_option("--confounding-column", dest="confounding", type="string",
                      help="either a comma-separates list or single value, "
                      "column number or column header")

    parser.add_option("--marker-group", dest="marker_col", type="string",
                      help="column header containing marker IDs to group "
                      "regression fits by")

    parser.add_option("--filter-zero-arrays", dest="filter_zero", action="store_true",
                      help="Filter out arrays where there are no observations")

    parser.add_option("--database", dest="database", type="string",
                      help="absolute path to SQLite database")

    parser.add_option("--tablename", dest="table", type="string",
                      help="tablename to extract from SQL database")

    parser.add_option("--filter-gates", dest="filt_gate", type="string",
                      help="regex to filter out unwanted triboolean gates")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)
    parser.set_defaults(filter_zero=True,
                        filt_gate=None)

    infile = argv[-1]

    if options.task == "merge_flow":

        merged_arrays = P52.mergeGateArrays(db=options.database,
                                            table_name=options.table,
                                            filter_zero=options.filter_zero,
                                            filter_gate=options.filt_gate)
        
        all_dfs = P52.mergeArrayWithDemographics(flow_arrays=merged_arrays,
                                                 id_column=options.id_column,
                                                 demo_file=options.demo_file,
                                                 demo_id_column=options.demo_id_column)

        for out_df in all_dfs:
            # construct the table names using cell_type, panel and gate
            # cell type and statistic should be in the out_pattern
            outname = "-".join([options.out_pattern, out_df["gate"][0],
                                out_df["panel"][0]])
            outname = outname.replace("/", "_")
            out_file = "/".join([options.out_dir, outname])
            E.info("writing %s data to file" % outname)
            # file names may contain '/', replace these with "_"
            out_df.to_csv(out_file, sep="\t", index_col="indx")
            # memory useage was ballooning from R's amazing ability
            # to free up memory after garbage collection.
            P52.clearREnvironment()

    elif options.task == "split_zygosity":
        out_frames = P52.split_zygosity(infile=infile,
                                        zygosity_column=options.zygosity_col,
                                        id_headers=(options.id_headers).split(","),
                                        pair_header=options.family_id)
        # expect keys: MZ and DZ
        try:
            MZ_frame = out_frames["MZ"]
            DZ_frame = out_frames["DZ"]          

            # output filenames using pattern and zygosity as a prefix
            MZ_outfile = "-".join([options.out_pattern, "MZ.tsv"])
            MZ_frame.to_csv(MZ_outfile, sep="\t", index_label="indx")

            DZ_outfile = "-".join([options.out_pattern,"DZ.tsv"])
            DZ_frame.to_csv(DZ_outfile, sep="\t", index_label="indx")
        except TypeError:
            pass

    elif options.task == "regress_confounding":
        out_df = P52.regress_out_confounding(infile=infile,
                                             confounding_column=options.confounding,
                                             group_var=options.marker_col)

        out_df.to_csv(options.stdout, sep="\t", index_label="indx")
    elif options.task == "kinship":
        out_df = P52.make_kinship_matrix(twins_file=infile,
                                         id_column=options.id_headers,
                                         family_column=options.family_id,
                                         zygosity_column=options.zygosity_col)

        out_df.to_csv(options.stdout, index_label="indx", sep="\t")


    # write footer and output benchmark information.
    E.Stop()
Ejemplo n.º 12
0
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--plot-type",
                      dest="plot_type",
                      type="choice",
                      choices=["histogram", "scatter", "barchart"],
                      help="type of plot to generate")

    parser.add_option("--x-axis",
                      dest="x_axis",
                      type="string",
                      help="variable to plot on the X-axis."
                      "This is the default axis for plotting.")

    parser.add_option("--y-axis",
                      dest="y_axis",
                      type="string",
                      help="variable to plot on the Y-axis")

    parser.add_option("--split-by",
                      dest="split_by",
                      type="string",
                      help="varible over which to split up plots")

    parser.add_option("--X-title",
                      dest="x_title",
                      type="string",
                      help="label to attach to X-axis")

    parser.add_option("--Y-title",
                      dest="y_title",
                      type="string",
                      help="label to attach to Y-axis")

    parser.add_option("--colour-var",
                      dest="col_var",
                      type="string",
                      help="variable to colour points by")

    parser.add_option("--free-scale",
                      dest="free_scale",
                      type="choice",
                      choices=["free_x", "free_y", "free"],
                      help="whether to "
                      "use free scaling on plot axes")

    parser.add_option("--outfile",
                      dest="outfile",
                      type="string",
                      help="file to save plot to")

    parser.add_option("--melt-data",
                      dest="melt",
                      action="store_true",
                      help="melt the dataframe first, requires ID vars")

    parser.add_option("--melt-id-vars",
                      dest="id_vars",
                      type="string",
                      help="comma separated list of id variables for"
                      " the melted dataframe")

    parser.add_option("--merge-frames",
                      dest="merge",
                      action="store_true",
                      help="merge two input dataframes together")

    parser.add_option("--merge-id-vars",
                      dest="merge_vars",
                      type="string",
                      help="comma separate list of id variables to merge "
                      "two dataframes on")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)
    parser.set_defaults(free_scale="both",
                        split_by=None,
                        col_var=None,
                        melt=False)

    infile = argv[-1]

    if len(infile.split(",")) == 2:
        infiles = infile.split(",")
        df1 = pd.read_table(infiles[0], sep=":", index_col=0, header=0)
        df2 = pd.read_table(infiles[1], sep=":", index_col=0, header=0)
        ids = options.merge_vars.split(",")
        df1[options.y_axis] = df1.index
        df2[options.y_axis] = df2.index

        df1.columns = [ids[0], options.y_axis]
        df2.columns = [ids[0], options.y_axis]

        df = pd.merge(df1, df2, on=options.y_axis)

        # these need to not be hard-coded!
        df.columns = ["mean_h2", ids[0], "fano_h2"]
    else:
        df = pd.read_table(infile, sep="\t", index_col=0, header=0)

    # assumes the first column is the index
    if options.melt:
        mids = options.id_vars.split(",")
        if options.y_axis:
            _df = pd.melt(df,
                          id_vars=options.x_axis,
                          value_name=options.y_axis,
                          var_name=options.col_var)
        else:
            _df = pd.melt(df,
                          id_vars=mids,
                          value_name=options.x_axis,
                          var_name=options.col_var)
        df = _df
    else:
        pass

    # check variables are present
    try:
        var = df[options.x_axis]
    except ValueError:
        raise ValueError("no plotting variable found")

    if options.col_var:
        try:
            cols = df[options.col_var]
        except ValueError:
            E.warn("Colour variable not found in data frame."
                   "Check the data file is the correct one")
    else:
        pass

    if options.split_by:
        try:
            splits = df[options.split_by]
        except ValueError:
            E.warn("Split-by variable not found in the data "
                   "frame.  Check the data file is the correct"
                   " one.")
    else:
        pass

    try:
        assert options.outfile
    except AssertionError:
        raise IOError("no output file detected")

    if options.plot_type == "histogram":
        P52.plotHistogram(data=df,
                          variable=options.x_axis,
                          save_path=options.outfile,
                          x_title=options.x_title,
                          y_title=options.y_title,
                          colour_var=options.col_var,
                          scales=options.free_scale,
                          split_var=options.split_by)
    elif options.plot_type == "barchart":
        P52.plotBarchart(data=df,
                         x_variable=options.x_axis,
                         y_variable=options.y_axis,
                         save_path=options.outfile,
                         x_title=options.x_title,
                         y_title=options.y_title,
                         colour_var=options.col_var,
                         split_var=options.split_by)
    else:
        pass

    # write footer and output benchmark information.
    E.Stop()