Example #1
0
async def validate_fromhub(
    namespace: str,
    pep_id: str,
):
    proj = peppy.Project(_PEP_STORES[namespace][pep_id])
    vals = {
        "name": pep_id,
        "filenames": "not provided",
        "peppy_version": peppy_version,
        "validations": [],
    }
    for schema_id, schema_data in schemas_to_test.items():
        vals["validations"].append({
            "id":
            schema_id,
            "name":
            schema_data["name"],
            "docs":
            schema_data["docs"],
            "schema":
            schema_data["schema"],
            "result":
            vwrap(proj, schema_data["schema"]),
        })
    return JSONResponse(content=vals)
Example #2
0
def main():
    args = parse_arguments()

    # Start project object
    prj = peppy.Project(args.project_config_file)

    if "trackhubs" not in prj._config:
        raise ValueError(
            "Project configuration does not have a trackhub section.")
    if "trackhub_dir" not in prj._config.trackhubs:
        raise ValueError(
            "Project trackhub configuration does not have a trackhub_dir attribute."
        )

    if args.attributes is None:
        # If no attributes are set try to use group attributes specified in the project config
        if hasattr(prj, "group_attributes"):
            print(
                "Using group attributes from project configuration file: '{}'."
                .format(",".join(prj.group_attributes)))
            args.attributes = prj.group_attributes
        # If none are set in project config, use just "sample_name"
        else:
            args.attributes = ["sample_name"]
    else:
        args.attributes = args.attributes.split(",")

    if args.color_attribute is None:
        args.color_attribute = args.attributes[0]

    # Setup paths and hub files
    bigwig_dir = os.path.join(prj._config.trackhubs.trackhub_dir)
    os.makedirs(bigwig_dir, exist_ok=True)
    track_hub = os.path.join(bigwig_dir, "hub.txt")
    genomes_hub = os.path.join(bigwig_dir, "genomes.txt")
    open(genomes_hub, "w").write("")

    # Setup attributes
    proj_name = (prj["name"] if "name" in prj else
                 os.path.basename(prj._config['root_dir'])
                 if "root_dir" in prj._config else os.path.join(
                     "..", "..", os.path.basename(prj._config.sample_table)))
    proj_desc = prj._config[
        "description"] if "description" in prj._config else proj_name
    user_email = prj._config["email"] if "email" in prj._config else ""

    # In the future there will be more actions than this
    make_ucsc_trackhub(args, prj, track_hub, bigwig_dir, genomes_hub,
                       proj_name, proj_desc, user_email)

    track_file = os.path.join(bigwig_dir, "igv", "index.html")
    track_url = os.path.join(prj.config["trackhubs"]["url"], "igv")
    make_igv_tracklink(prj, track_file, track_url)
Example #3
0
async def validate_pep(request: Request,
                       files: List[UploadFile] = File(...),
                       schemas_to_test=schemas_to_test):
    ufiles = []
    upload_folder = "uploads"
    for file in files:
        print(f"File: '{file}'")
        file_object = file.file
        full_path = os.path.join(upload_folder, file.filename)
        # if not os.path.isfile(full_path):
        #     print(f"failed isfile test: {full_path}")
        #     return JSONResponse(content={ "error": "No files provided."})
        uploaded = open(full_path, "wb+")
        shutil.copyfileobj(file_object, uploaded)
        uploaded.close()
        print(uploaded.name)
        f, ext = os.path.splitext(file.filename)
        print(ext)
        if ext == ".yaml" or ext == ".yml" or ext == ".csv":
            pconf = uploaded.name
            print("Got yaml:", pconf)
    print(pconf)
    p = peppy.Project(pconf)
    print(p)

    vals = {
        "name": pconf,
        "filenames": [file.filename for file in files],
        "peppy_version": peppy_version,
        "validations": [],
    }
    for schema_id, schema_data in schemas_to_test.items():
        vals["validations"].append({
            "id": schema_id,
            "name": schema_data["name"],
            "docs": schema_data["docs"],
            "schema": schema_data["schema"],
            "result": vwrap(p, schema_data["schema"]),
        })
    return JSONResponse(content=vals)
class ChainedAssignent:
    def __init__(self, chained=None):
        acceptable = [None, "warn", "raise"]
        assert chained in acceptable, "chained must be in " + str(acceptable)
        self.swcw = chained

    def __enter__(self):
        self.saved_swcw = pd.options.mode.chained_assignment
        pd.options.mode.chained_assignment = self.swcw
        return self

    def __exit__(self, *args):
        pd.options.mode.chained_assignment = self.saved_swcw


pep = peppy.Project(snakemake.input.pep)
sample_metadata = pep.sample_table.rename(columns={"sample_name": "alias"})

with open(snakemake.input.config) as h:
    conf = yaml.full_load(h)

file_name_template = snakemake.params[0]

# ENA_study
ena_study_cols = ["alias", "title", "study_type", "study_abstract"]
ena_study_comments = [
    "Unique identificator for a study. This is used to link experiments to the study.",
    "Title of the study as would be used in a publication.",
    "The STUDY_TYPE presents a controlled vocabulary for expressing the overall purpose of the study.",
    "Briefly describes the goals, purpose, and scope of the Study.  This need not be listed if it can be inherited from a referenced publication.",
]
Example #5
0
import sys

import peppy
import pandas as pd

if __name__ == '__main__':
    pep = peppy.Project(sys.argv[1])
    df = pep.sample_table
    df = df.loc[:, df.convert_dtypes().dtypes != 'object']
    if 'sample_name' in df.columns:
        df = df.rename(columns={
            'sample_name': 'Sample_ID'
        }).set_index('Sample_ID')
    df.to_csv(sys.stdout, sep='\t')
Example #6
0
def validate_pep(namespace: str, pep_id: str):
    try:
        yield peppy.Project(_PEP_STORES[namespace][pep_id])
    except NotImplementedError as nie:
        raise HTTPException(status_code=400,
                            detail=f"Error loading PEP. {nie}")
Example #7
0
# to help writing test cases.
# You must be in the pep tests dir:
# cd ${CODE}/peppy/tests
# ipython

import conftest

print("Establishing Project for testing and exploration")
proj = conftest.interactive()


import peppy
import os
reload(peppy)
peppy._LOGGER.setLevel(50)
p = peppy.Project(os.path.expandvars("$CODE/example_peps/example2/project_config.yaml"))

p.get_sample("frog_1").subsamples

p.sample_table
p.subsample_table


p.get_sample("frog_2").subsamples
p.get_sample("frog_2").subsamples[0].subsample_name

p.get_subsample(sample_name="frog_1", subsample_name="2")


subsamples = []
type(subsamples)
Example #8
0
def main(cli=None):
    args = parse_arguments().parse_args(cli)

    # Start project
    print("Starting peppy project with project"
          "configuration file: '{}'".format(args.config_file))
    prj = peppy.Project(args.config_file)
    print("Changing directory to project root"
          "directory: '{}'.".format(prj.metadata.output_dir))
    os.chdir(prj.metadata.output_dir)
    if args.pass_qc:
        print("Filtering samples out which didn't pass QC"
              "as specified in sample annotation in column 'pass_qc'")
        prj._samples = [
            s for s in prj._samples
            if s.pass_qc not in ["0", 0, "False", False]
        ]

    # ANALYSIS
    if args.data_type is None:
        print("Type of analysis not specified. Will run independent analysis"
              "for all types of data in the sample annotation sheet.")
        data_types = sorted(list(set([s.protocol for s in prj._samples])))
        print("Sample data types: '{}'.".format(",".join(data_types)))
    else:
        print("Type of analysis specified. Will run only"
              "analysis for samples of type '{}'.".format(args.data_type))
        data_types = [args.data_type]
        print("Sample data types: '{}'.".format(",".join(data_types)))
    if args.name is None:
        print("Analysis name not specified, will use name in"
              "project configuration file: '{}'.".format(prj.project_name))
        args.name = prj.project_name

    for data_type in data_types:
        print("Starting analysis for samples of type: '{}'.".format(data_type))
        samples = [s for s in prj._samples if (s.protocol == data_type)]
        if len(samples) > 0:
            print("Samples under consideration: '{}'. ".format(",".join(
                [s.name for s in samples])) +
                  "Total of {} samples.".format(len([s.name
                                                     for s in samples])))
        else:
            raise ValueError(
                "There were no valid samples for this analysis type!")

        kwargs = {
            "prj": prj,
            "samples": samples,
            "results_dir": args.results_dir
        }
        if data_type in ["ATAC-seq"]:
            print("Initializing ATAC-seq analysis")
            analysis = ATACSeqAnalysis(name=args.name + "_atacseq", **kwargs)
        elif data_type in ["ChIP-seq"]:
            print("Initializing ChIP-seq analysis")
            analysis = ChIPSeqAnalysis(name=args.name + "_chipseq", **kwargs)
        elif data_type in ["RNA-seq"]:
            print("Initializing RNA-seq analysis")
            analysis = RNASeqAnalysis(name=args.name + "_rnaseq", **kwargs)

        print("Running main analysis.")
        main_analysis_pipeline(analysis,
                               alpha=args.alpha,
                               abs_fold_change=args.abs_fold_change)
        print("`ngs_analysis` recipe completed successfully!")