def cli(cohort, batch, overview): """Removes all associated rows of specified batch/cohort from database.""" with session_scope() as session: if cohort: for cohort_id in cohort: if session.query(Cohort.id).filter( Cohort.id == cohort_id).scalar() is None: # Roll back of all changes made during this session. raise Exception( f"No cohort {cohort_id} is present in the database. Nothing has been deleted." "\nRun --overview option to see what is currently present." ) else: # Delete all assoicated rows session.query( Cohort.id).filter(Cohort.id == cohort_id).delete() if batch: raise Exception( "\nBoth --cohort and --batch used in the same command, please try again as two seperate commands." f"\nNothing has been deleted from the database.") click.echo( f"Cohort(s) {list(cohort)} and all assoicated entries have been deleted." ) elif batch: for cohort_id, batch_name in batch: if session.query(Batch.id).filter( Batch.batch_name == batch_name, Batch.cohort_id == cohort_id).scalar() is None: # Roll back of all changes made during this session. raise Exception( f"No batch {batch_name} is present in the database. Nothing has been deleted." "\nRun --overview option to see what is currently present." ) else: # update cohort table sample_count column. batch_sample_count = session.query(Batch).filter( Batch.batch_name == batch_name, Batch.cohort_id == cohort_id).one().sample_count cohort_sample_count = session.query(Cohort).filter( Cohort.id == cohort_id).one().sample_count session.query(Cohort).filter(Cohort.id == cohort_id).one( ).sample_count = cohort_sample_count - batch_sample_count # update cohort table batch_count column. batch_count = session.query(Cohort).filter( Cohort.id == cohort_id).one().batch_count session.query(Cohort).filter(Cohort.id == cohort_id).one( ).batch_count = batch_count - 1 # Delete all assoicated rows. session.query(Batch.id).filter( Batch.batch_name == batch_name, Batch.cohort_id == cohort_id).delete() click.echo( f"Batch(s) {list(batch)} and all assoicated entries have been deleted." ) if overview: print_overview(session)
def check_db_paths(skip_update): importlib.reload(config) importlib.reload(crud) with crud.session_scope() as session: # Query the paths in the batch table paths = session.query(Batch.path) # We don't want to re-check the same path checked = [] print("Checking database paths...") for path_tuple in paths: old_path = path_tuple[0] if old_path not in checked: # check it exists if exists(old_path): # add it to checked checked.append(old_path) else: print( f"File '{basename(old_path)}' no longer exists at path '{old_path}'" ) if not skip_update: # Prompt for update if click.confirm( 'Would you like to update this path now?'): new_path = click.prompt( f"Please enter the correct path for the directory '{basename(old_path)}'" ) # check the new path actually exists new_path = abspath(new_path) if exists(new_path): print( f"Updating old path '{old_path}' to new path '{new_path}'" ) session.query(Batch).filter(Batch.path == old_path).\ update({Batch.path: new_path}, synchronize_session = False) checked.append(old_path) else: print( f"Aborting update... file path '{new_path}' does not exist" ) print("Database check complete!")
def cli(select, tool_metric, batch, cohort, batch_description, cohort_description, sample_description, flowcell_lane, library_id, platform, centre, reference, type, multiqc, csv, pretty, overview, output, filename): """Query the falcon qc database by specifying what you would like to select on by using the --select option, and what to filter on (--tool_metric, --batch, or --cohort).""" if (multiqc or csv) and not output: click.echo( "When using multiqc or csv option, please specify a directory to save in using the -o option" ) sys.exit(1) # Sqlaclehmy query that will be constructed based on this command's options. falcon_query = None # Check output and filename for validity. if (output): output = os.path.abspath(output) if (not os.path.exists(output)): raise Exception(f"Output path {output} does not exist.") if (not os.path.isdir(output)): raise Exception( f"Output path {output} is NOT a directory. Please use a directory path with --output." ) if not filename: raise Exception( "--output requires --filename (no extension) to name the csv or multiqc report" ) ### ================================= SELECT ==========================================#### # Both select and filter options influence whether certain tables need to be joined, the following handles this. select = list(select) join = { 'joins': set(), 'joined': set() } # Keeping track of what needs to be joined, and what has been joined. if multiqc and "sample" not in select: select.insert(0, 'sample') if sample_description or flowcell_lane or library_id or platform or centre or reference or type or 'sample' in select: join['joins'].add('sample') if cohort or cohort_description or 'cohort' in select: join['joins'].add('cohort') if batch or batch_description or 'batch' in select: join['joins'].add('batch') if tool_metric or 'tool-metric' in select: join['joins'].add('tool-metric') [join['joins'].add(s) for s in select] with session_scope() as session: falcon_query = query_select(session, select, join, tool_metric, multiqc) ### ================================= FILTER ==========================================#### ## 1. Sample if tool_metric: falcon_query = query_metric(falcon_query, join, tool_metric) if sample_description: conditions = [ Sample.description.contains(d, autoescape=True) for d in sample_description ] falcon_query = falcon_query.filter(or_(*conditions)) if flowcell_lane: falcon_query = falcon_query.filter( Sample.flowcell_lane.in_(flowcell_lane)) if library_id: falcon_query = falcon_query.filter(Sample.library_id.in_(library_id)) if platform: falcon_query = falcon_query.filter(Sample.platform.in_(platform)) if centre: falcon_query = falcon_query.filter(Sample.centre.in_(centre)) if reference: falcon_query = falcon_query.filter( Sample.reference_genome.in_(reference)) if type: falcon_query = falcon_query.filter(Sample.type.in_(type)) ## 2. Cohort if cohort: falcon_query = falcon_query.filter(Cohort.id.in_(cohort)) if cohort_description: conditions = [ Cohort.description.contains(d, autoescape=True) for d in cohort_description ] falcon_query = falcon_query.filter(or_(*conditions)) ## 3. Batch if batch: falcon_query = falcon_query.filter(Batch.batch_name.in_(batch)) if batch_description: conditions = [ Batch.description.contains(d, autoescape=True) for d in batch_description ] falcon_query = falcon_query.filter(or_(*conditions)) ### ============================== RESULT / OUTPUT =======================================#### if len(falcon_query.all()) == 0: for tm in tool_metric: # Check whether tool is valid. if session.query(RawData.id).filter( RawData.qc_tool == tm[0]).first() is None: raise Exception( f"The tool {tm[0]} is not present in the database, please check its validity." ) metrics = session.query( RawData.metrics).filter(RawData.qc_tool == tm[0]).first() if tm[1] not in metrics[0]: raise Exception( f"The metric {tm[1]} is not present in the metrics of tool {tm[0]}, please check its validity." ) raise Exception("No results from query") # Create header from the current query (falcon_query). query_header = [] for col in falcon_query.column_descriptions: query_header.append(col["entity"].__tablename__ + "." + col["name"]) if multiqc: click.echo("Creating multiqc report...") create_new_multiqc([(row.sample_name, row.path) for row in falcon_query], output, filename) if csv: click.echo("Creating csv report...") create_csv(query_header, falcon_query, output, filename) if pretty and not csv and not multiqc and not overview: click.echo(f'Query returned {falcon_query.count()} samples.') click.echo(tabulate(falcon_query, query_header, tablefmt="pretty")) elif not csv and not multiqc and not overview: # Print result. click.echo(f'Query returned {falcon_query.count()} samples.') print_csv(query_header, falcon_query) if overview: print_overview(session)
def cli(output, filename, sql, multiqc, csv, overview, pretty): """SQL query tool: ensure all queries SELECT for sample_name from sample table AND path from batch table""" if (multiqc or csv) and not output: click.echo( "When using multiqc or csv option, please specify a directory to save in using the -o option." ) sys.exit(1) if (output): output = os.path.abspath(output) if (not os.path.exists(output)): raise Exception(f"Output path {output} does not exist.") if (not os.path.isdir(output)): raise Exception( f"Output path {output} is NOT a directory. Please use a directory path with --output." ) if not filename: raise Exception( "--output requires --filename (no extension) to name the csv or multiqc report" ) click.echo("Processing sql query!") with session_scope() as session: if sql: if sql[-4:] != '.txt': click.echo( "When using --sql option, please supply path to .txt containing the raw SQL statement like in the examples." ) sys.exit(1) sql = os.path.abspath(sql) with open(sql) as sql_file: # Copy raw SQL statement as string. sql = '\n'.join(sql_file.readlines()) falcon_query = session.execute( sql) # Executes SQL query against database. query_header = falcon_query.keys( ) # Create header from the current query (falcon_query). click.echo(f"Query returned {falcon_query.rowcount} samples.") if multiqc: if len([ col for col in query_header if 'sample_name' in col or 'path' in col ]) == 2: click.echo("Creating multiqc report...") create_new_multiqc([(row.sample_name, row.path) for row in falcon_query], output, filename) else: click.echo( "When using --multiqc option, please select for sample.sample_name AND batch.path (see example_3)." ) sys.exit(1) if csv: click.echo("Creating csv report...") create_csv(query_header, falcon_query, output, filename) if pretty and not csv and not multiqc and not overview: # Print result. click.echo( tabulate(falcon_query, query_header, tablefmt="pretty")) if not multiqc and not csv and not overview: # Print result. print_csv(query_header, falcon_query) if overview: print_overview(session)
def cli(directory, sample_metadata, input_csv, batch_description, cohort_description, batch_metadata, cohort_metadata): """Saves the given cohort directory to the falcon_multiqc database""" if (not directory and not input_csv) and not (batch_metadata or cohort_metadata): raise Exception( "Save requires either an input --directory OR --input_csv.") if directory and input_csv: raise Exception( "Save requires only one of input --directory OR --input_csv, not both." ) with session_scope() as session: # Did we get a csv? if input_csv: # Check we actually have a csv if input_csv[-4:] == '.csv': with open(input_csv, 'r') as input_csv: csv_reader = csv.reader(input_csv) header = next(csv_reader) # Check the headers of the csv are directory,sample_metadata if header[0] == "directory" and header[ 1] == "sample_metadata": with session_scope() as session: for row in csv_reader: # Check that the files in the csv actually exist if not exists(row[0]): click.echo( f"Error: Directory {directory} does not exist." "\nAll database entries have been rolled back, please retry after fixing" ) sys.exit(1) elif not exists(row[1]): click.echo( f"Error: Sample metadata {sample_metadata} does not exist." "\nAll database entries have been rolled back, please retry after fixing" ) sys.exit(1) else: # save the info in that row save_sample(abspath(row[0]), row[1], session, cohort_description, batch_description) else: click.echo( "CSV requires directory and sample_metadata headers." ) sys.exit(1) else: click.echo( "A csv file is required when using the --input_csv flag.") sys.exit(1) elif directory or sample_metadata: if not (directory and sample_metadata): click.echo( "Please specify the path to multiqc_output AND path to respective metadata.csv when saving to database" ) sys.exit(1) # Default: when a single directory or file is provided save_sample(abspath(directory), sample_metadata, session, cohort_description, batch_description) click.echo(f"All multiqc and metadata results have been saved.") session.commit( ) # commit (save to db) all rows saved during transaction for given metadata/multic_JSON to database # Save batch metadata. if batch_metadata: # Skip header next(batch_metadata) for line in batch_metadata: split = line.split(",") try: batch_name = split[0].strip(stripChars) batch_description = split[2].strip(stripChars) except IndexError: raise Exception( f"Batch_metadata format is invalid, Accepted format is:" "\n'Batch_Name' 'Number_of_samples' 'Batch_description'" ) # Update each batch with the given batch description. try: (session.query(Batch).filter( Batch.batch_name == batch_name).one().description ) = batch_description except NoResultFound: raise Exception( f"Batch '{batch_name}' is not present in the database so description cannot be added." "\nAll batch description entries have been rolled back, please retry after fixing" ) session.commit() click.echo(f"Batch descriptions has been saved.") # Save Cohort metadata if cohort_metadata: next(cohort_metadata) for line in cohort_metadata: split = line.split(",") try: cohort_name = split[0].strip(stripChars) cohort_description = split[4].strip(stripChars) except IndexError: raise Exception( f"Cohort_metadata format is invalid, Accepted format is:" "\n'Cohort_Name' 'Number_of_samples' 'Number_of_Batches' 'type' 'Cohort_description'" ) # Update each Cohort with the given Cohort description. try: (session.query(Cohort).filter( Cohort.id == cohort_name).one().description ) = cohort_description except NoResultFound: raise Exception( f"Cohort '{cohort_name}' is not present in the database so description cannot be added, exiting." "\nAll cohort description entries have been rolled back, please retry after fixing" ) session.commit() click.echo(f"Cohort descriptions has been saved.")