Exemple #1
0
def resource_cutadapt_metrics(uri, **kwargs):
    with open(uri) as fh:
        data = "".join(fh)
    sections = re.split("\n===.*===\n", data)
    df = DataFrame.from_records([_split_x(x) for x in sections[1].split("\n") if x],
                                index=["statistic"], columns=["statistic", "value"])
    df["value"] = pd.to_numeric(df["value"])
    return df
Exemple #2
0
def _reader(uri):
    with open(uri) as fh:
        data = [x.strip("\n").split("\t") for x in fh if not x.strip() == ""]
        indices = list((i for i, val in enumerate(data)
                        if val[0].startswith("## METRICS CLASS")))
        metrics = DataFrame.from_records(data[(indices[0] + 2):],
                                         columns=data[(indices[0] + 1)],
                                         index="CATEGORY")
    return (metrics, None)
Exemple #3
0
def _hist_reader(uri):
    with open(uri) as fh:
        data = [x.strip("\n").split("\t") for x in fh if not x.strip() == ""]
        indices = list((i for i, val in enumerate(data)
                        if val[0].startswith("## METRICS CLASS")
                        or val[0].startswith("## HISTOGRAM")))
        if len(indices) == 1:
            indices.append(len(data))
        metrics = DataFrame.from_records(data[(indices[0] + 2):(indices[1])],
                                         columns=data[(indices[0] + 1)])
        # We could be missing the histogram
        try:
            hist = DataFrame.from_records(data[(indices[1] + 2):],
                                          columns=data[(indices[1] + 1)])
        except:
            logger.warn("No histogram data for {}".format(uri))
            hist = None
    return (metrics, hist)
Exemple #4
0
def resource_genome_results(uri, key="Globals", **kwargs):
    with open(uri) as fh:
        data = "".join(fh)
    sections = re.split(">+\s+[a-zA-Z ]+", data)
    section_names = ["Header"] + [re.sub(" ", "_", x) for x in re.findall(">+\s+([a-zA-Z ]+)", data)]
    d = dict()
    for h, sec in zip(section_names, sections):
        if h == "Coverage_per_contig":
            d[h] = DataFrame.from_records([re.split("\s+", x.strip()) for x in sec.split("\n") if x],
                                          columns=COVERAGE_PER_CONTIG_COLUMNS,
                                          index="chr")
            d[h] = d[h].apply(pd.to_numeric)
        elif h in ["Coverage", "Header"]:
            pass
        else:
            d[h] = DataFrame.from_records([_split_x(x) for x in sec.split("\n") if x],
                                          columns=["statistic", "value"],
                                          index="statistic")
            if not h in ["Input"]:
                d[h] = d[h].apply(pd.to_numeric)
    return d[key]