async def validate_fromhub( namespace: str, pep_id: str, ): proj = peppy.Project(_PEP_STORES[namespace][pep_id]) vals = { "name": pep_id, "filenames": "not provided", "peppy_version": peppy_version, "validations": [], } for schema_id, schema_data in schemas_to_test.items(): vals["validations"].append({ "id": schema_id, "name": schema_data["name"], "docs": schema_data["docs"], "schema": schema_data["schema"], "result": vwrap(proj, schema_data["schema"]), }) return JSONResponse(content=vals)
def main(): args = parse_arguments() # Start project object prj = peppy.Project(args.project_config_file) if "trackhubs" not in prj._config: raise ValueError( "Project configuration does not have a trackhub section.") if "trackhub_dir" not in prj._config.trackhubs: raise ValueError( "Project trackhub configuration does not have a trackhub_dir attribute." ) if args.attributes is None: # If no attributes are set try to use group attributes specified in the project config if hasattr(prj, "group_attributes"): print( "Using group attributes from project configuration file: '{}'." .format(",".join(prj.group_attributes))) args.attributes = prj.group_attributes # If none are set in project config, use just "sample_name" else: args.attributes = ["sample_name"] else: args.attributes = args.attributes.split(",") if args.color_attribute is None: args.color_attribute = args.attributes[0] # Setup paths and hub files bigwig_dir = os.path.join(prj._config.trackhubs.trackhub_dir) os.makedirs(bigwig_dir, exist_ok=True) track_hub = os.path.join(bigwig_dir, "hub.txt") genomes_hub = os.path.join(bigwig_dir, "genomes.txt") open(genomes_hub, "w").write("") # Setup attributes proj_name = (prj["name"] if "name" in prj else os.path.basename(prj._config['root_dir']) if "root_dir" in prj._config else os.path.join( "..", "..", os.path.basename(prj._config.sample_table))) proj_desc = prj._config[ "description"] if "description" in prj._config else proj_name user_email = prj._config["email"] if "email" in prj._config else "" # In the future there will be more actions than this make_ucsc_trackhub(args, prj, track_hub, bigwig_dir, genomes_hub, proj_name, proj_desc, user_email) track_file = os.path.join(bigwig_dir, "igv", "index.html") track_url = os.path.join(prj.config["trackhubs"]["url"], "igv") make_igv_tracklink(prj, track_file, track_url)
async def validate_pep(request: Request, files: List[UploadFile] = File(...), schemas_to_test=schemas_to_test): ufiles = [] upload_folder = "uploads" for file in files: print(f"File: '{file}'") file_object = file.file full_path = os.path.join(upload_folder, file.filename) # if not os.path.isfile(full_path): # print(f"failed isfile test: {full_path}") # return JSONResponse(content={ "error": "No files provided."}) uploaded = open(full_path, "wb+") shutil.copyfileobj(file_object, uploaded) uploaded.close() print(uploaded.name) f, ext = os.path.splitext(file.filename) print(ext) if ext == ".yaml" or ext == ".yml" or ext == ".csv": pconf = uploaded.name print("Got yaml:", pconf) print(pconf) p = peppy.Project(pconf) print(p) vals = { "name": pconf, "filenames": [file.filename for file in files], "peppy_version": peppy_version, "validations": [], } for schema_id, schema_data in schemas_to_test.items(): vals["validations"].append({ "id": schema_id, "name": schema_data["name"], "docs": schema_data["docs"], "schema": schema_data["schema"], "result": vwrap(p, schema_data["schema"]), }) return JSONResponse(content=vals)
class ChainedAssignent: def __init__(self, chained=None): acceptable = [None, "warn", "raise"] assert chained in acceptable, "chained must be in " + str(acceptable) self.swcw = chained def __enter__(self): self.saved_swcw = pd.options.mode.chained_assignment pd.options.mode.chained_assignment = self.swcw return self def __exit__(self, *args): pd.options.mode.chained_assignment = self.saved_swcw pep = peppy.Project(snakemake.input.pep) sample_metadata = pep.sample_table.rename(columns={"sample_name": "alias"}) with open(snakemake.input.config) as h: conf = yaml.full_load(h) file_name_template = snakemake.params[0] # ENA_study ena_study_cols = ["alias", "title", "study_type", "study_abstract"] ena_study_comments = [ "Unique identificator for a study. This is used to link experiments to the study.", "Title of the study as would be used in a publication.", "The STUDY_TYPE presents a controlled vocabulary for expressing the overall purpose of the study.", "Briefly describes the goals, purpose, and scope of the Study. This need not be listed if it can be inherited from a referenced publication.", ]
import sys import peppy import pandas as pd if __name__ == '__main__': pep = peppy.Project(sys.argv[1]) df = pep.sample_table df = df.loc[:, df.convert_dtypes().dtypes != 'object'] if 'sample_name' in df.columns: df = df.rename(columns={ 'sample_name': 'Sample_ID' }).set_index('Sample_ID') df.to_csv(sys.stdout, sep='\t')
def validate_pep(namespace: str, pep_id: str): try: yield peppy.Project(_PEP_STORES[namespace][pep_id]) except NotImplementedError as nie: raise HTTPException(status_code=400, detail=f"Error loading PEP. {nie}")
# to help writing test cases. # You must be in the pep tests dir: # cd ${CODE}/peppy/tests # ipython import conftest print("Establishing Project for testing and exploration") proj = conftest.interactive() import peppy import os reload(peppy) peppy._LOGGER.setLevel(50) p = peppy.Project(os.path.expandvars("$CODE/example_peps/example2/project_config.yaml")) p.get_sample("frog_1").subsamples p.sample_table p.subsample_table p.get_sample("frog_2").subsamples p.get_sample("frog_2").subsamples[0].subsample_name p.get_subsample(sample_name="frog_1", subsample_name="2") subsamples = [] type(subsamples)
def main(cli=None): args = parse_arguments().parse_args(cli) # Start project print("Starting peppy project with project" "configuration file: '{}'".format(args.config_file)) prj = peppy.Project(args.config_file) print("Changing directory to project root" "directory: '{}'.".format(prj.metadata.output_dir)) os.chdir(prj.metadata.output_dir) if args.pass_qc: print("Filtering samples out which didn't pass QC" "as specified in sample annotation in column 'pass_qc'") prj._samples = [ s for s in prj._samples if s.pass_qc not in ["0", 0, "False", False] ] # ANALYSIS if args.data_type is None: print("Type of analysis not specified. Will run independent analysis" "for all types of data in the sample annotation sheet.") data_types = sorted(list(set([s.protocol for s in prj._samples]))) print("Sample data types: '{}'.".format(",".join(data_types))) else: print("Type of analysis specified. Will run only" "analysis for samples of type '{}'.".format(args.data_type)) data_types = [args.data_type] print("Sample data types: '{}'.".format(",".join(data_types))) if args.name is None: print("Analysis name not specified, will use name in" "project configuration file: '{}'.".format(prj.project_name)) args.name = prj.project_name for data_type in data_types: print("Starting analysis for samples of type: '{}'.".format(data_type)) samples = [s for s in prj._samples if (s.protocol == data_type)] if len(samples) > 0: print("Samples under consideration: '{}'. ".format(",".join( [s.name for s in samples])) + "Total of {} samples.".format(len([s.name for s in samples]))) else: raise ValueError( "There were no valid samples for this analysis type!") kwargs = { "prj": prj, "samples": samples, "results_dir": args.results_dir } if data_type in ["ATAC-seq"]: print("Initializing ATAC-seq analysis") analysis = ATACSeqAnalysis(name=args.name + "_atacseq", **kwargs) elif data_type in ["ChIP-seq"]: print("Initializing ChIP-seq analysis") analysis = ChIPSeqAnalysis(name=args.name + "_chipseq", **kwargs) elif data_type in ["RNA-seq"]: print("Initializing RNA-seq analysis") analysis = RNASeqAnalysis(name=args.name + "_rnaseq", **kwargs) print("Running main analysis.") main_analysis_pipeline(analysis, alpha=args.alpha, abs_fold_change=args.abs_fold_change) print("`ngs_analysis` recipe completed successfully!")