Python build_filter_query Examples, megaqc.rest_api.filters.build_filter_query Python Examples

Example #1

0

Show file

File: test_build_filter.py Project: genomicsITER/MegaQC

def test_reportmeta_not_equals(filter_test_reports):
    # Finds all samples that don't have a certain metadata key
    query = build_filter_query([
        [
            {
                'type': 'reportmeta',
                'key': 'key_1',
                'value': [2],
                'cmp': 'ne'
            }
        ]
    ])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id
    ).all()

    # We should get two reports
    reports = unique(data, 'report_id')
    assert len(reports) == 2

    # We should get two samples
    samples = unique(data, 'sample_id')
    assert len(samples) == 2

    # And neither sample should be sample 2
    for report in data:
        assert report.report_id != filter_test_reports[1].report_id

Example #2

0

Show file

File: test_build_filter.py Project: genomicsITER/MegaQC

def test_samplemeta_operator(filter_test_reports, filter_test_types, cmp, correct):
    """
    Tests all comparison operators, and the samplemeta filter
    """
    # Finds all samples that have less than 2 for some value
    query = build_filter_query([
        [
            {
                'type': 'samplemeta',
                'key': filter_test_types[0].data_key,
                'value': [2],
                'cmp': cmp
            }
        ]
    ])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id
    ).all()

    # We should get the right number of reports
    reports = unique(data, 'report_id')
    assert len(reports) == len(correct)

    # We should get two samples
    samples = unique(data, 'sample_id')
    assert len(samples) == len(correct)

    # And the sample should be sample 1
    for sample in correct:
        assert filter_test_reports[sample].report_id in reports

Example #3

0

Show file

File: test_build_filter.py Project: genomicsITER/MegaQC

def test_timedelta_not_in(filter_test_reports):
    # Finds all samples uploaded in the last 2 days, but using timedelta
    query = build_filter_query([
        [
            {
                'type': 'timedelta',
                'value': [2],
                'cmp': 'not in'
            }
        ]
    ])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id
    ).all()

    # This should return 1 sample
    assert len(unique(data, 'sample_id')) == 1

    # These samples should come from only 1 reports
    reports = unique(data, 'report_id')
    assert len(reports) == 1

    # Specifically, it should be the last report that is returned
    assert filter_test_reports[2].report_id in reports

Example #4

0

Show file

File: test_build_filter.py Project: genomicsITER/MegaQC

def test_reportmeta_equals(filter_test_reports, meta_key, value):
    # db.session.add_all(filter_test_reports)

    # Finds all samples that have a certain metadata key
    query = build_filter_query([
        [
            {
                'type': 'reportmeta',
                'key': meta_key,
                'value': [value],
                'cmp': 'eq'
            }
        ]
    ])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id
    ).all()

    # We should get one report
    reports = unique(data, 'report_id')
    assert len(reports) == 1

    # We should get one sample
    samples = unique(data, 'sample_id')
    assert len(samples) == 1

    # And that one sample should be the second sample
    assert reports.pop() == filter_test_reports[1].report_id

Example #5

0

Show file

File: test_build_filter.py Project: genomicsITER/MegaQC

def test_daterange_not_in(filter_test_reports):
    # Finds all samples uploaded in the last 2 days
    query = build_filter_query([
        [
            {
                'type': 'daterange',
                'value': [
                    (datetime.datetime.now() - datetime.timedelta(days=2)).strftime(
                        DATE_FORMAT),
                    (datetime.datetime.now()).strftime(DATE_FORMAT),
                ],
                'cmp': 'not in'
            }
        ]
    ])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id
    ).group_by(models.Sample.sample_id).all()

    # This should return 1 sample
    assert len(unique(data, 'sample_id')) == 1

    # These samples should come from only 1 reports
    reports = unique(data, 'report_id')
    assert len(reports) == 1

    # Specifically, it should be the last report that is returned
    assert filter_test_reports[2].report_id in reports

Example #6

0

Show file

File: test_build_filter.py Project: genomicsITER/MegaQC

def test_timedelta_in(filter_test_reports):
    # Finds all samples uploaded in the last 2 days
    query = build_filter_query([
        [
            {
                'type': 'timedelta',
                'value': [2],
                'cmp': 'in'
            }
        ]
    ])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id
    ).all()

    # This should return 2 samples
    assert len(unique(data, 'sample_id')) == 2

    # These samples should come from only 2 reports
    reports = unique(data, 'report_id')
    assert len(reports) == 2

    # Specifically, it should be the first two reports that are returned
    assert filter_test_reports[0].report_id in reports
    assert filter_test_reports[1].report_id in reports

Example #7

0

Show file

def test_daterange_in(filter_test_reports):
    # Finds all samples uploaded in the last 2 days
    query = build_filter_query([[{
        "type":
        "daterange",
        "value": [
            (datetime.datetime.now() -
             datetime.timedelta(days=2)).strftime(DATE_FORMAT),
            (datetime.datetime.now()).strftime(DATE_FORMAT),
        ],
        "cmp":
        "in",
    }]])
    for report in filter_test_reports:
        print(len(report.samples))
    data = (query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id,
    ).group_by(models.Sample.sample_id).all())

    # This should return 2 samples
    assert len(unique(data, "sample_id")) == 2

    # These samples should come from only 2 reports
    reports = unique(data, "report_id")
    assert len(reports) == 2

    # Specifically, it should be the first two reports that are returned
    assert filter_test_reports[0].report_id in reports
    assert filter_test_reports[1].report_id in reports

Example #8

0

Show file

def test_samplemeta_operator(filter_test_reports, filter_test_types, cmp,
                             value, correct, data_type):
    """
    Tests all comparison operators, and the samplemeta filter.
    """
    # Finds all samples that have less than 2 for some value
    query = build_filter_query([[{
        "type": "samplemeta",
        "key": filter_test_types[data_type].data_key,
        "value": [value],
        "cmp": cmp,
    }]])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id,
    ).all()

    # We should get the right number of reports
    reports = unique(data, "report_id")
    assert len(reports) == len(correct)

    # We should get two samples
    samples = unique(data, "sample_id")
    assert len(samples) == len(correct)

    # And the sample should be sample 1
    for sample in correct:
        assert filter_test_reports[sample].report_id in reports

Example #9

0

Show file

def test_reportmeta_not_equals(filter_test_reports):
    # Finds all samples that don't have a certain metadata key
    query = build_filter_query([[{
        "type": "reportmeta",
        "key": "key_1",
        "value": [2],
        "cmp": "ne"
    }]])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id,
    ).all()

    # We should get two reports
    reports = unique(data, "report_id")
    assert len(reports) == 2

    # We should get two samples
    samples = unique(data, "sample_id")
    assert len(samples) == 2

    # And neither sample should be sample 2
    for report in data:
        assert report.report_id != filter_test_reports[1].report_id

Example #10

0

Show file

def test_reportmeta_equals(filter_test_reports, meta_key, value):
    # db.session.add_all(filter_test_reports)

    # Finds all samples that have a certain metadata key
    query = build_filter_query([[{
        "type": "reportmeta",
        "key": meta_key,
        "value": [value],
        "cmp": "eq"
    }]])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id,
    ).all()

    # We should get one report
    reports = unique(data, "report_id")
    assert len(reports) == 1

    # We should get one sample
    samples = unique(data, "sample_id")
    assert len(samples) == 1

    # And that one sample should be the second sample
    assert reports.pop() == filter_test_reports[1].report_id

Example #11

0

Show file

File: test_build_filter.py Project: genomicsITER/MegaQC

def test_or_query(filter_test_types, filter_test_reports):
    """
    Find the intersections of two queries
    """

    # Finds all samples that have greater than 2 for some sample value, OR less than 2 for some report metadata
    query = build_filter_query([
        [
            {
                'type': 'samplemeta',
                'key': filter_test_types[0].data_key,
                'value': [2],
                'cmp': 'gt'
            },
        ],
        [
            {
                'type': 'reportmeta',
                'key': 'key_1',
                'value': [2],
                'cmp': 'lt'
            }
        ]
    ])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id
    ).all()

    # We should get two reports
    reports = unique(data, 'report_id')
    assert len(reports) == 2

    # We should get two samples
    samples = unique(data, 'sample_id')
    assert len(samples) == 2

    # And the sample returned should not be the second sample
    assert filter_test_reports[1].report_id not in reports

Example #12

0

Show file

def test_or_query(filter_test_types, filter_test_reports):
    """
    Find the intersections of two queries.
    """

    # Finds all samples that have greater than 2 for some sample value, OR less than 2 for some report metadata
    query = build_filter_query([
        [
            {
                "type": "samplemeta",
                "key": filter_test_types[0].data_key,
                "value": [2],
                "cmp": "gt",
            },
        ],
        [{
            "type": "reportmeta",
            "key": "key_1",
            "value": [2],
            "cmp": "lt"
        }],
    ])
    data = query.with_entities(
        models.Report.created_at,
        models.SampleData.value,
        models.Sample.sample_id,
        models.Report.report_id,
    ).all()

    # We should get two reports
    reports = unique(data, "report_id")
    assert len(reports) == 2

    # We should get two samples
    samples = unique(data, "sample_id")
    assert len(samples) == 2

    # And the sample returned should not be the second sample
    assert filter_test_reports[1].report_id not in reports

Example #13

0

Show file

File: plot.py Project: pythseq/MegaQC

def trend_data(fields, filter, plot_prefix, control_limits, center_line):
    """
    Returns data suitable for a plotly plot.
    """
    subquery = build_filter_query(filter)
    plots = []
    for field, colour in zip(fields, DEFAULT_PLOTLY_COLORS):

        # Choose the columns to select, and further filter it down to samples with the column we want to plot
        query = (
            db.session.query(Sample)
            .join(SampleData, isouter=True)
            .join(SampleDataType, isouter=True)
            .join(Report, Report.report_id == Sample.report_id, isouter=True)
            .with_entities(
                models.Sample.sample_name,
                models.SampleDataType.nice_name,
                models.Report.created_at,
                models.SampleData.value,
            )
            .order_by(models.Report.created_at.asc(),)
            .filter(Sample.sample_id.in_(subquery))
            .distinct()
        )

        # Fields can be specified either as type IDs, or as type names
        if field.isdigit():
            query = query.filter(models.SampleDataType.sample_data_type_id == field)
        else:
            query = query.filter(models.SampleDataType.data_key == field)

        data = query.all()

        # If the query returned nothing, skip this field
        if len(data) == 0:
            break

        names, data_types, x, y = zip(*data)
        data_type = data_types[0]
        names = numpy.asarray(names, dtype=str)
        x = numpy.asarray(x)
        y = numpy.asarray(y, dtype=float)

        # Anything outside the control limits is an outlier
        outliers = absolute(zscore(y)) > control_limits["sigma"]
        inliers = ~outliers

        # Add the outliers
        plots.append(
            dict(
                id=plot_prefix + "_outlier_" + field,
                type="scatter",
                text=names[outliers],
                hoverinfo="text+x+y",
                x=x[outliers],
                y=y[outliers],
                line=dict(color="rgb(250,0,0)"),
                mode="markers",
                name="{} Outliers".format(data_type),
            )
        )

        # Add the non-outliers
        plots.append(
            dict(
                id=plot_prefix + "_raw_" + field,
                type="scatter",
                text=names[inliers],
                hoverinfo="text+x+y",
                x=x[inliers],
                y=y[inliers],
                line=dict(color=colour),
                mode="markers",
                name="{} Samples".format(data_type),
            )
        )

        # Add the mean
        if center_line == "mean":
            y2 = numpy.repeat(numpy.mean(y), len(x))
            plots.append(
                dict(
                    id=plot_prefix + "_mean_" + field,
                    type="scatter",
                    x=x,
                    y=y2.tolist(),
                    line=dict(color=colour),
                    mode="lines",
                    name="{} Mean".format(data_type),
                )
            )
        elif center_line == "median":
            y2 = numpy.repeat(numpy.median(y), len(x))
            plots.append(
                dict(
                    id=plot_prefix + "_median_" + field,
                    type="scatter",
                    x=x,
                    y=y2.tolist(),
                    line=dict(color=colour),
                    mode="lines",
                    name="{} Median".format(data_type),
                )
            )
        else:
            # The user could request control limits without a center line. Assume they
            # want a mean in this case
            y2 = numpy.repeat(numpy.mean(y), len(x))

        # Add the stdev
        if control_limits["enabled"]:
            x3 = numpy.concatenate((x, numpy.flip(x, axis=0)))
            stdev = numpy.repeat(numpy.std(y) * control_limits["sigma"], len(x))
            upper = y2 + stdev
            lower = y2 - stdev
            y3 = numpy.concatenate((lower, upper))
            plots.append(
                dict(
                    id=plot_prefix + "_stdev_" + field,
                    type="scatter",
                    x=x3.tolist(),
                    y=y3.tolist(),
                    fill="tozerox",
                    fillcolor=rgb_to_rgba(colour, 0.5),
                    line=dict(color="rgba(255,255,255,0)"),
                    name="{} Control Limits".format(data_type),
                )
            )

    return plots