Exemple #1
0
def cli(cohort, batch, overview):
    """Removes all associated rows of specified batch/cohort from database."""

    with session_scope() as session:
        if cohort:
            for cohort_id in cohort:
                if session.query(Cohort.id).filter(
                        Cohort.id == cohort_id).scalar() is None:
                    # Roll back of all changes made during this session.
                    raise Exception(
                        f"No cohort {cohort_id} is present in the database. Nothing has been deleted."
                        "\nRun --overview option to see what is currently present."
                    )
                else:
                    # Delete all assoicated rows
                    session.query(
                        Cohort.id).filter(Cohort.id == cohort_id).delete()
                    if batch:
                        raise Exception(
                            "\nBoth --cohort and --batch used in the same command, please try again as two seperate commands."
                            f"\nNothing has been deleted from the database.")
            click.echo(
                f"Cohort(s) {list(cohort)} and all assoicated entries have been deleted."
            )
        elif batch:
            for cohort_id, batch_name in batch:
                if session.query(Batch.id).filter(
                        Batch.batch_name == batch_name, Batch.cohort_id
                        == cohort_id).scalar() is None:
                    # Roll back of all changes made during this session.
                    raise Exception(
                        f"No batch {batch_name} is present in the database. Nothing has been deleted."
                        "\nRun --overview option to see what is currently present."
                    )
                else:
                    # update cohort table sample_count column.
                    batch_sample_count = session.query(Batch).filter(
                        Batch.batch_name == batch_name,
                        Batch.cohort_id == cohort_id).one().sample_count
                    cohort_sample_count = session.query(Cohort).filter(
                        Cohort.id == cohort_id).one().sample_count
                    session.query(Cohort).filter(Cohort.id == cohort_id).one(
                    ).sample_count = cohort_sample_count - batch_sample_count
                    # update cohort table batch_count column.
                    batch_count = session.query(Cohort).filter(
                        Cohort.id == cohort_id).one().batch_count
                    session.query(Cohort).filter(Cohort.id == cohort_id).one(
                    ).batch_count = batch_count - 1
                    # Delete all assoicated rows.
                    session.query(Batch.id).filter(
                        Batch.batch_name == batch_name,
                        Batch.cohort_id == cohort_id).delete()
            click.echo(
                f"Batch(s) {list(batch)} and all assoicated entries have been deleted."
            )
    if overview:
        print_overview(session)
Exemple #2
0
def check_db_paths(skip_update):
    importlib.reload(config)
    importlib.reload(crud)
    with crud.session_scope() as session:
        # Query the paths in the batch table
        paths = session.query(Batch.path)

        # We don't want to re-check the same path
        checked = []
        print("Checking database paths...")
        for path_tuple in paths:
            old_path = path_tuple[0]
            if old_path not in checked:
                # check it exists
                if exists(old_path):
                    # add it to checked
                    checked.append(old_path)
                else:
                    print(
                        f"File '{basename(old_path)}' no longer exists at path '{old_path}'"
                    )
                    if not skip_update:
                        # Prompt for update
                        if click.confirm(
                                'Would you like to update this path now?'):
                            new_path = click.prompt(
                                f"Please enter the correct path for the directory '{basename(old_path)}'"
                            )
                            # check the new path actually exists
                            new_path = abspath(new_path)
                            if exists(new_path):
                                print(
                                    f"Updating old path '{old_path}' to new path '{new_path}'"
                                )

                                session.query(Batch).filter(Batch.path == old_path).\
                                update({Batch.path: new_path}, synchronize_session = False)

                                checked.append(old_path)
                            else:
                                print(
                                    f"Aborting update... file path '{new_path}' does not exist"
                                )
        print("Database check complete!")
Exemple #3
0
def cli(select, tool_metric, batch, cohort, batch_description,
        cohort_description, sample_description, flowcell_lane, library_id,
        platform, centre, reference, type, multiqc, csv, pretty, overview,
        output, filename):
    """Query the falcon qc database by specifying what you would like to select on by using the --select option, and
    what to filter on (--tool_metric, --batch, or --cohort)."""

    if (multiqc or csv) and not output:
        click.echo(
            "When using multiqc or csv option, please specify a directory to save in using the -o option"
        )
        sys.exit(1)

    # Sqlaclehmy query that will be constructed based on this command's options.
    falcon_query = None

    # Check output and filename for validity.
    if (output):
        output = os.path.abspath(output)
        if (not os.path.exists(output)):
            raise Exception(f"Output path {output} does not exist.")
        if (not os.path.isdir(output)):
            raise Exception(
                f"Output path {output} is NOT a directory. Please use a directory path with --output."
            )
        if not filename:
            raise Exception(
                "--output requires --filename (no extension) to name the csv or multiqc report"
            )

    ### ================================= SELECT  ==========================================####
    # Both select and filter options influence whether certain tables need to be joined, the following handles this.

    select = list(select)
    join = {
        'joins': set(),
        'joined': set()
    }  # Keeping track of what needs to be joined, and what has been joined.
    if multiqc and "sample" not in select:
        select.insert(0, 'sample')
    if sample_description or flowcell_lane or library_id or platform or centre or reference or type or 'sample' in select:
        join['joins'].add('sample')
    if cohort or cohort_description or 'cohort' in select:
        join['joins'].add('cohort')
    if batch or batch_description or 'batch' in select:
        join['joins'].add('batch')
    if tool_metric or 'tool-metric' in select:
        join['joins'].add('tool-metric')
    [join['joins'].add(s) for s in select]

    with session_scope() as session:
        falcon_query = query_select(session, select, join, tool_metric,
                                    multiqc)

    ### ================================= FILTER  ==========================================####

    ## 1. Sample
    if tool_metric:
        falcon_query = query_metric(falcon_query, join, tool_metric)

    if sample_description:
        conditions = [
            Sample.description.contains(d, autoescape=True)
            for d in sample_description
        ]
        falcon_query = falcon_query.filter(or_(*conditions))

    if flowcell_lane:
        falcon_query = falcon_query.filter(
            Sample.flowcell_lane.in_(flowcell_lane))

    if library_id:
        falcon_query = falcon_query.filter(Sample.library_id.in_(library_id))

    if platform:
        falcon_query = falcon_query.filter(Sample.platform.in_(platform))

    if centre:
        falcon_query = falcon_query.filter(Sample.centre.in_(centre))

    if reference:
        falcon_query = falcon_query.filter(
            Sample.reference_genome.in_(reference))

    if type:
        falcon_query = falcon_query.filter(Sample.type.in_(type))

    ## 2. Cohort
    if cohort:
        falcon_query = falcon_query.filter(Cohort.id.in_(cohort))

    if cohort_description:
        conditions = [
            Cohort.description.contains(d, autoescape=True)
            for d in cohort_description
        ]
        falcon_query = falcon_query.filter(or_(*conditions))

    ## 3. Batch
    if batch:
        falcon_query = falcon_query.filter(Batch.batch_name.in_(batch))

    if batch_description:
        conditions = [
            Batch.description.contains(d, autoescape=True)
            for d in batch_description
        ]
        falcon_query = falcon_query.filter(or_(*conditions))

    ### ============================== RESULT / OUTPUT =======================================####
    if len(falcon_query.all()) == 0:
        for tm in tool_metric:
            # Check whether tool is valid.
            if session.query(RawData.id).filter(
                    RawData.qc_tool == tm[0]).first() is None:
                raise Exception(
                    f"The tool {tm[0]} is not present in the database, please check its validity."
                )

            metrics = session.query(
                RawData.metrics).filter(RawData.qc_tool == tm[0]).first()
            if tm[1] not in metrics[0]:
                raise Exception(
                    f"The metric {tm[1]} is not present in the metrics of tool {tm[0]}, please check its validity."
                )

        raise Exception("No results from query")

    # Create header from the current query (falcon_query).
    query_header = []
    for col in falcon_query.column_descriptions:
        query_header.append(col["entity"].__tablename__ + "." + col["name"])

    if multiqc:
        click.echo("Creating multiqc report...")
        create_new_multiqc([(row.sample_name, row.path)
                            for row in falcon_query], output, filename)

    if csv:
        click.echo("Creating csv report...")
        create_csv(query_header, falcon_query, output, filename)

    if pretty and not csv and not multiqc and not overview:
        click.echo(f'Query returned {falcon_query.count()} samples.')
        click.echo(tabulate(falcon_query, query_header, tablefmt="pretty"))

    elif not csv and not multiqc and not overview:
        # Print result.
        click.echo(f'Query returned {falcon_query.count()} samples.')
        print_csv(query_header, falcon_query)

    if overview:
        print_overview(session)
Exemple #4
0
def cli(output, filename, sql, multiqc, csv, overview, pretty):
    """SQL query tool: ensure all queries SELECT for sample_name from sample table AND path from batch table"""

    if (multiqc or csv) and not output:
        click.echo(
            "When using multiqc or csv option, please specify a directory to save in using the -o option."
        )
        sys.exit(1)

    if (output):
        output = os.path.abspath(output)
        if (not os.path.exists(output)):
            raise Exception(f"Output path {output} does not exist.")
        if (not os.path.isdir(output)):
            raise Exception(
                f"Output path {output} is NOT a directory. Please use a directory path with --output."
            )
        if not filename:
            raise Exception(
                "--output requires --filename (no extension) to name the csv or multiqc report"
            )

    click.echo("Processing sql query!")
    with session_scope() as session:
        if sql:
            if sql[-4:] != '.txt':
                click.echo(
                    "When using --sql option, please supply path to .txt containing the raw SQL statement like in the examples."
                )
                sys.exit(1)
            sql = os.path.abspath(sql)
            with open(sql) as sql_file:
                # Copy raw SQL statement as string.
                sql = '\n'.join(sql_file.readlines())

            falcon_query = session.execute(
                sql)  # Executes SQL query against database.
            query_header = falcon_query.keys(
            )  # Create header from the current query (falcon_query).
            click.echo(f"Query returned {falcon_query.rowcount} samples.")

            if multiqc:
                if len([
                        col for col in query_header
                        if 'sample_name' in col or 'path' in col
                ]) == 2:
                    click.echo("Creating multiqc report...")
                    create_new_multiqc([(row.sample_name, row.path)
                                        for row in falcon_query], output,
                                       filename)
                else:
                    click.echo(
                        "When using --multiqc option, please select for sample.sample_name AND batch.path (see example_3)."
                    )
                    sys.exit(1)

            if csv:
                click.echo("Creating csv report...")
                create_csv(query_header, falcon_query, output, filename)

            if pretty and not csv and not multiqc and not overview:
                # Print result.
                click.echo(
                    tabulate(falcon_query, query_header, tablefmt="pretty"))

            if not multiqc and not csv and not overview:
                # Print result.
                print_csv(query_header, falcon_query)

        if overview:
            print_overview(session)
Exemple #5
0
def cli(directory, sample_metadata, input_csv, batch_description,
        cohort_description, batch_metadata, cohort_metadata):
    """Saves the given cohort directory to the falcon_multiqc database"""

    if (not directory
            and not input_csv) and not (batch_metadata or cohort_metadata):
        raise Exception(
            "Save requires either an input --directory OR --input_csv.")
    if directory and input_csv:
        raise Exception(
            "Save requires only one of input --directory OR --input_csv, not both."
        )

    with session_scope() as session:

        # Did we get a csv?
        if input_csv:

            # Check we actually have a csv
            if input_csv[-4:] == '.csv':

                with open(input_csv, 'r') as input_csv:
                    csv_reader = csv.reader(input_csv)
                    header = next(csv_reader)

                    # Check the headers of the csv are directory,sample_metadata
                    if header[0] == "directory" and header[
                            1] == "sample_metadata":
                        with session_scope() as session:
                            for row in csv_reader:
                                # Check that the files in the csv actually exist
                                if not exists(row[0]):
                                    click.echo(
                                        f"Error: Directory {directory} does not exist."
                                        "\nAll database entries have been rolled back, please retry after fixing"
                                    )
                                    sys.exit(1)
                                elif not exists(row[1]):
                                    click.echo(
                                        f"Error: Sample metadata {sample_metadata} does not exist."
                                        "\nAll database entries have been rolled back, please retry after fixing"
                                    )
                                    sys.exit(1)
                                else:
                                    # save the info in that row
                                    save_sample(abspath(row[0]), row[1],
                                                session, cohort_description,
                                                batch_description)
                    else:
                        click.echo(
                            "CSV requires directory and sample_metadata headers."
                        )
                        sys.exit(1)
            else:
                click.echo(
                    "A csv file is required when using the --input_csv flag.")
                sys.exit(1)

        elif directory or sample_metadata:
            if not (directory and sample_metadata):
                click.echo(
                    "Please specify the path to multiqc_output AND path to respective metadata.csv when saving to database"
                )
                sys.exit(1)

            # Default: when a single directory or file is provided
            save_sample(abspath(directory), sample_metadata, session,
                        cohort_description, batch_description)

        click.echo(f"All multiqc and metadata results have been saved.")
        session.commit(
        )  # commit (save to db) all rows saved during transaction for given metadata/multic_JSON to database

        # Save batch metadata.
        if batch_metadata:
            # Skip header
            next(batch_metadata)
            for line in batch_metadata:
                split = line.split(",")
                try:
                    batch_name = split[0].strip(stripChars)
                    batch_description = split[2].strip(stripChars)
                except IndexError:
                    raise Exception(
                        f"Batch_metadata format is invalid, Accepted format is:"
                        "\n'Batch_Name' 'Number_of_samples' 'Batch_description'"
                    )
                # Update each batch with the given batch description.
                try:
                    (session.query(Batch).filter(
                        Batch.batch_name == batch_name).one().description
                     ) = batch_description
                except NoResultFound:
                    raise Exception(
                        f"Batch '{batch_name}' is not present in the database so description cannot be added."
                        "\nAll batch description entries have been rolled back, please retry after fixing"
                    )
            session.commit()
            click.echo(f"Batch descriptions has been saved.")

        # Save Cohort metadata
        if cohort_metadata:
            next(cohort_metadata)
            for line in cohort_metadata:
                split = line.split(",")
                try:
                    cohort_name = split[0].strip(stripChars)
                    cohort_description = split[4].strip(stripChars)
                except IndexError:
                    raise Exception(
                        f"Cohort_metadata format is invalid, Accepted format is:"
                        "\n'Cohort_Name' 'Number_of_samples' 'Number_of_Batches' 'type' 'Cohort_description'"
                    )
                # Update each Cohort with the given Cohort description.
                try:
                    (session.query(Cohort).filter(
                        Cohort.id == cohort_name).one().description
                     ) = cohort_description
                except NoResultFound:
                    raise Exception(
                        f"Cohort '{cohort_name}' is not present in the database so description cannot be added, exiting."
                        "\nAll cohort description entries have been rolled back, please retry after fixing"
                    )
            session.commit()
            click.echo(f"Cohort descriptions has been saved.")