def plot_s(genedata: pd.DataFrame, chrom_offsets: pd.DataFrame, outpath: str,
           ymin: Optional[float], ymax: Optional[float]) -> None:
    if (not ymin) or (not ymax):
        ymin = max(min(genedata["s"]), -1)
        ymax = min(max(genedata["s"]), 1)
    mh.plot_manhat(genedata,
                   outpath,
                   chrom_offsets,
                   "s",
                   title="Selection coefficient",
                   yname="s",
                   dims=(20, 6),
                   scale=1.5,
                   log=True,
                   named_xticks=True,
                   chrom_col="Scaffold",
                   geom="line",
                   ylim=(ymin, ymax))
def plot_p(genedata: pd.DataFrame, chrom_offsets: pd.DataFrame, outpath: str,
           ymin: Optional[float], ymax: Optional[float]) -> None:
    if (not ymin) or (not ymax):
        ymin = max(min(genedata["p"]), 0)
        ymax = max(genedata["p"])
    mh.plot_manhat(
        genedata,
        outpath,
        chrom_offsets,
        "p",
        title="Probability of selection coefficient occurring randomly",
        yname="-log10(p)",
        dims=(20, 6),
        scale=1.5,
        log=True,
        named_xticks=True,
        chrom_col="Scaffold",
        geom="line",
        ylim=(ymin, ymax))
Ejemplo n.º 3
0
def plot_p(genedata: pd.DataFrame, chrom_offsets: pd.DataFrame, outpath: str,
           ymin: Optional[float], ymax: Optional[float]) -> None:
    if (not ymin) or (not ymax):
        ymin = max(min(genedata["p"]), 0)
        ymax = max(genedata["p"])
    mh.plot_manhat(
        genedata,
        outpath,
        chrom_offsets,
        "p",
        title="Tukey test probability of\ndifference in selection coefficient",
        yname="-log10(p)",
        dims=(20, 6),
        scale=1.5,
        log=True,
        named_xticks=True,
        chrom_col="Scaffold",
        geom="point",
        ylim=(ymin, ymax),
        color_col="reject")
Ejemplo n.º 4
0
def main():
    with open("reflens.bed", "r") as inconn:
        chrlens = mh.get_chrom_lens_from_bed(inconn)
    print(chrlens)
    gdata = mh.get_data_from_bed("answer_dens.bed", data_col_name="Density")
    print(gdata)
    rdata = mh.get_data_from_bed("repanswer_dens.bed", data_col_name="Density")
    print(rdata)
    genedata, chrom_offsets = mh.manhatify(gdata,
                                           chrlens,
                                           chrom_col="Scaffold",
                                           bp_col="Start",
                                           val_col="Value",
                                           feature="Genes")
    repdata, chrom_offsets = mh.manhatify(rdata,
                                          chrlens,
                                          chrom_col="Scaffold",
                                          bp_col="Start",
                                          val_col="Value",
                                          feature="Repeats")
    combo = mh.combine_data((genedata, repdata))
    print(combo.head())
    print(chrom_offsets)
    mh.plot_manhat(combo,
                   "select_families_dens2.pdf",
                   chrom_offsets,
                   "Density",
                   title="Feature densities",
                   yname="Features per 1Mb window",
                   dims=(20, 6),
                   scale=1.5,
                   facet_col="Feature")
    mh.plot_manhat(combo,
                   "select_families_dens3.pdf",
                   chrom_offsets,
                   "Density",
                   title="Feature densities",
                   yname="Features per 1Mb window",
                   dims=(20, 6),
                   scale=1.5,
                   color_col="Feature")
Ejemplo n.º 5
0
def main():

    # parse all arguments
    parser = argparse.ArgumentParser(
        "Visualize Hi-C pairing rates as a 2-d line plot.")
    parser.add_argument(
        "input",
        nargs='*',
        help="Input file(s) generated by pairviz (default = stdin).")
    parser.add_argument("-L",
                        "--chrlens",
                        help="chromosome lengths .bed file (required).",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        help="output path (default = out.pdf).")
    parser.add_argument("-t",
                        "--title",
                        help="Title of plot (default = \"Pairing Rate\").")
    parser.add_argument(
        "-p",
        "--proportion",
        help=
        "If included, plot as a proportion of total reads in the region, rather than absolute (default = False).",
        action="store_true")
    parser.add_argument(
        "-f",
        "--no_fpkm",
        help=
        "If included, plot read counts rather than FPKM (default = False, can be combined with --proportion).",
        action="store_true")
    parser.add_argument("-s",
                        "--self",
                        help="Also plot self-interactions (default = False).",
                        action="store_true")
    parser.add_argument(
        "-c",
        "--chromspace",
        help=
        "bp of space to put between chromosomes in plot (default = 5000000).")
    parser.add_argument("-l",
                        "--log",
                        help="Log-scale the y-axis (default = False).",
                        action="store_true")
    parser.add_argument("-i",
                        "--stdin",
                        help="take input from stdin along with other inputs.",
                        action="store_true")
    parser.add_argument(
        "-n",
        "--name_col",
        help="Name of the column to use for differentiating different runs")
    parser.add_argument("-x", "--x_axis_name", help="X axis name.")
    parser.add_argument("-y", "--y_axis_name", help="Y axis name.")
    parser.add_argument("-N",
                        "--named_xticks",
                        help="Use chromosome names for X axis ticks.",
                        action="store_true")

    args = parser.parse_args()

    # Set all variables to defaule values, handle argument logic
    chrlens_path = args.chrlens
    output = "out.pdf"
    inconns = []
    if args.stdin or not input:
        inconns.append(sys.stdin)
    title = "Pairing Rate"
    proportion = False
    self = False
    chromspace = 5000000
    log = False
    name_col = None
    use_fpkm = True
    xname = "Genome position (bp)"
    yname = "Hi-C contacts"
    named_xticks = False
    if args.output:
        output = args.output
    if args.input:
        for i in args.input:
            inconns.append(open(i, "r"))
    if args.title:
        title = args.title
    if args.proportion:
        proportion = args.proportion
    if args.self:
        self = args.self
    if args.chromspace:
        chromspace = args.chromspace
    if args.log:
        log = args.log
    if args.no_fpkm:
        use_fpkm = False
    if args.x_axis_name:
        xname = args.x_axis_name
    if args.y_axis_name:
        yname = args.y_axis_name
    if args.name_col:
        name_col = args.name_col
    if args.named_xticks:
        named_xticks = True

    # set proportion vs total hits
    if not use_fpkm:
        if proportion:
            my_y = 'pair_prop'
            alt_y = 'alt_prop'
        else:
            my_y = 'hits'
            alt_y = 'alt_hits'
    else:
        if proportion:
            my_y = 'pair_prop_fpkm'
            alt_y = 'alt_prop_fpkm'
        else:
            my_y = 'pair_fpkm'
            alt_y = 'alt_fpkm'

    # make the combined data frame
    alldatas = parse_all_data(inconns)

    with open(chrlens_path, "r") as inconn:
        chrlens = mh.get_chrom_lens_from_bed(inconn)

    big_alldata = pd.concat(alldatas, ignore_index=True)
    if name_col:
        m_alldata = pd.melt(big_alldata,
                            id_vars=['chrom', 'start', 'end', name_col])
    else:
        m_alldata = pd.melt(big_alldata, id_vars=['chrom', 'start', 'end'])
    if self:
        mm_alldata = m_alldata[m_alldata.apply(lambda x: x['variable'] in
                                               (my_y, alt_y),
                                               axis=1)]
    else:
        mm_alldata = m_alldata[m_alldata.apply(lambda x: x['variable'] == my_y,
                                               axis=1)]
    mm_alldata['value'] = mm_alldata['value'].astype(float)
    manhat_data, chroffsets = mh.manhatify(mm_alldata,
                                           chrlens,
                                           chrom_col="chrom",
                                           bp_col="start",
                                           val_col="value",
                                           feature=name_col)

    mh.plot_manhat(manhat_data,
                   output,
                   chroffsets,
                   "value",
                   title=title,
                   xname=xname,
                   yname=yname,
                   color_col=name_col,
                   log=log,
                   scale=1,
                   named_xticks=named_xticks,
                   chrom_col="chrom")