def test_reportmeta_not_equals(filter_test_reports): # Finds all samples that don't have a certain metadata key query = build_filter_query([ [ { 'type': 'reportmeta', 'key': 'key_1', 'value': [2], 'cmp': 'ne' } ] ]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id ).all() # We should get two reports reports = unique(data, 'report_id') assert len(reports) == 2 # We should get two samples samples = unique(data, 'sample_id') assert len(samples) == 2 # And neither sample should be sample 2 for report in data: assert report.report_id != filter_test_reports[1].report_id
def test_samplemeta_operator(filter_test_reports, filter_test_types, cmp, correct): """ Tests all comparison operators, and the samplemeta filter """ # Finds all samples that have less than 2 for some value query = build_filter_query([ [ { 'type': 'samplemeta', 'key': filter_test_types[0].data_key, 'value': [2], 'cmp': cmp } ] ]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id ).all() # We should get the right number of reports reports = unique(data, 'report_id') assert len(reports) == len(correct) # We should get two samples samples = unique(data, 'sample_id') assert len(samples) == len(correct) # And the sample should be sample 1 for sample in correct: assert filter_test_reports[sample].report_id in reports
def test_timedelta_not_in(filter_test_reports): # Finds all samples uploaded in the last 2 days, but using timedelta query = build_filter_query([ [ { 'type': 'timedelta', 'value': [2], 'cmp': 'not in' } ] ]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id ).all() # This should return 1 sample assert len(unique(data, 'sample_id')) == 1 # These samples should come from only 1 reports reports = unique(data, 'report_id') assert len(reports) == 1 # Specifically, it should be the last report that is returned assert filter_test_reports[2].report_id in reports
def test_reportmeta_equals(filter_test_reports, meta_key, value): # db.session.add_all(filter_test_reports) # Finds all samples that have a certain metadata key query = build_filter_query([ [ { 'type': 'reportmeta', 'key': meta_key, 'value': [value], 'cmp': 'eq' } ] ]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id ).all() # We should get one report reports = unique(data, 'report_id') assert len(reports) == 1 # We should get one sample samples = unique(data, 'sample_id') assert len(samples) == 1 # And that one sample should be the second sample assert reports.pop() == filter_test_reports[1].report_id
def test_daterange_not_in(filter_test_reports): # Finds all samples uploaded in the last 2 days query = build_filter_query([ [ { 'type': 'daterange', 'value': [ (datetime.datetime.now() - datetime.timedelta(days=2)).strftime( DATE_FORMAT), (datetime.datetime.now()).strftime(DATE_FORMAT), ], 'cmp': 'not in' } ] ]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id ).group_by(models.Sample.sample_id).all() # This should return 1 sample assert len(unique(data, 'sample_id')) == 1 # These samples should come from only 1 reports reports = unique(data, 'report_id') assert len(reports) == 1 # Specifically, it should be the last report that is returned assert filter_test_reports[2].report_id in reports
def test_timedelta_in(filter_test_reports): # Finds all samples uploaded in the last 2 days query = build_filter_query([ [ { 'type': 'timedelta', 'value': [2], 'cmp': 'in' } ] ]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id ).all() # This should return 2 samples assert len(unique(data, 'sample_id')) == 2 # These samples should come from only 2 reports reports = unique(data, 'report_id') assert len(reports) == 2 # Specifically, it should be the first two reports that are returned assert filter_test_reports[0].report_id in reports assert filter_test_reports[1].report_id in reports
def test_daterange_in(filter_test_reports): # Finds all samples uploaded in the last 2 days query = build_filter_query([[{ "type": "daterange", "value": [ (datetime.datetime.now() - datetime.timedelta(days=2)).strftime(DATE_FORMAT), (datetime.datetime.now()).strftime(DATE_FORMAT), ], "cmp": "in", }]]) for report in filter_test_reports: print(len(report.samples)) data = (query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id, ).group_by(models.Sample.sample_id).all()) # This should return 2 samples assert len(unique(data, "sample_id")) == 2 # These samples should come from only 2 reports reports = unique(data, "report_id") assert len(reports) == 2 # Specifically, it should be the first two reports that are returned assert filter_test_reports[0].report_id in reports assert filter_test_reports[1].report_id in reports
def test_samplemeta_operator(filter_test_reports, filter_test_types, cmp, value, correct, data_type): """ Tests all comparison operators, and the samplemeta filter. """ # Finds all samples that have less than 2 for some value query = build_filter_query([[{ "type": "samplemeta", "key": filter_test_types[data_type].data_key, "value": [value], "cmp": cmp, }]]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id, ).all() # We should get the right number of reports reports = unique(data, "report_id") assert len(reports) == len(correct) # We should get two samples samples = unique(data, "sample_id") assert len(samples) == len(correct) # And the sample should be sample 1 for sample in correct: assert filter_test_reports[sample].report_id in reports
def test_reportmeta_not_equals(filter_test_reports): # Finds all samples that don't have a certain metadata key query = build_filter_query([[{ "type": "reportmeta", "key": "key_1", "value": [2], "cmp": "ne" }]]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id, ).all() # We should get two reports reports = unique(data, "report_id") assert len(reports) == 2 # We should get two samples samples = unique(data, "sample_id") assert len(samples) == 2 # And neither sample should be sample 2 for report in data: assert report.report_id != filter_test_reports[1].report_id
def test_reportmeta_equals(filter_test_reports, meta_key, value): # db.session.add_all(filter_test_reports) # Finds all samples that have a certain metadata key query = build_filter_query([[{ "type": "reportmeta", "key": meta_key, "value": [value], "cmp": "eq" }]]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id, ).all() # We should get one report reports = unique(data, "report_id") assert len(reports) == 1 # We should get one sample samples = unique(data, "sample_id") assert len(samples) == 1 # And that one sample should be the second sample assert reports.pop() == filter_test_reports[1].report_id
def test_or_query(filter_test_types, filter_test_reports): """ Find the intersections of two queries """ # Finds all samples that have greater than 2 for some sample value, OR less than 2 for some report metadata query = build_filter_query([ [ { 'type': 'samplemeta', 'key': filter_test_types[0].data_key, 'value': [2], 'cmp': 'gt' }, ], [ { 'type': 'reportmeta', 'key': 'key_1', 'value': [2], 'cmp': 'lt' } ] ]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id ).all() # We should get two reports reports = unique(data, 'report_id') assert len(reports) == 2 # We should get two samples samples = unique(data, 'sample_id') assert len(samples) == 2 # And the sample returned should not be the second sample assert filter_test_reports[1].report_id not in reports
def test_or_query(filter_test_types, filter_test_reports): """ Find the intersections of two queries. """ # Finds all samples that have greater than 2 for some sample value, OR less than 2 for some report metadata query = build_filter_query([ [ { "type": "samplemeta", "key": filter_test_types[0].data_key, "value": [2], "cmp": "gt", }, ], [{ "type": "reportmeta", "key": "key_1", "value": [2], "cmp": "lt" }], ]) data = query.with_entities( models.Report.created_at, models.SampleData.value, models.Sample.sample_id, models.Report.report_id, ).all() # We should get two reports reports = unique(data, "report_id") assert len(reports) == 2 # We should get two samples samples = unique(data, "sample_id") assert len(samples) == 2 # And the sample returned should not be the second sample assert filter_test_reports[1].report_id not in reports
def trend_data(fields, filter, plot_prefix, control_limits, center_line): """ Returns data suitable for a plotly plot. """ subquery = build_filter_query(filter) plots = [] for field, colour in zip(fields, DEFAULT_PLOTLY_COLORS): # Choose the columns to select, and further filter it down to samples with the column we want to plot query = ( db.session.query(Sample) .join(SampleData, isouter=True) .join(SampleDataType, isouter=True) .join(Report, Report.report_id == Sample.report_id, isouter=True) .with_entities( models.Sample.sample_name, models.SampleDataType.nice_name, models.Report.created_at, models.SampleData.value, ) .order_by(models.Report.created_at.asc(),) .filter(Sample.sample_id.in_(subquery)) .distinct() ) # Fields can be specified either as type IDs, or as type names if field.isdigit(): query = query.filter(models.SampleDataType.sample_data_type_id == field) else: query = query.filter(models.SampleDataType.data_key == field) data = query.all() # If the query returned nothing, skip this field if len(data) == 0: break names, data_types, x, y = zip(*data) data_type = data_types[0] names = numpy.asarray(names, dtype=str) x = numpy.asarray(x) y = numpy.asarray(y, dtype=float) # Anything outside the control limits is an outlier outliers = absolute(zscore(y)) > control_limits["sigma"] inliers = ~outliers # Add the outliers plots.append( dict( id=plot_prefix + "_outlier_" + field, type="scatter", text=names[outliers], hoverinfo="text+x+y", x=x[outliers], y=y[outliers], line=dict(color="rgb(250,0,0)"), mode="markers", name="{} Outliers".format(data_type), ) ) # Add the non-outliers plots.append( dict( id=plot_prefix + "_raw_" + field, type="scatter", text=names[inliers], hoverinfo="text+x+y", x=x[inliers], y=y[inliers], line=dict(color=colour), mode="markers", name="{} Samples".format(data_type), ) ) # Add the mean if center_line == "mean": y2 = numpy.repeat(numpy.mean(y), len(x)) plots.append( dict( id=plot_prefix + "_mean_" + field, type="scatter", x=x, y=y2.tolist(), line=dict(color=colour), mode="lines", name="{} Mean".format(data_type), ) ) elif center_line == "median": y2 = numpy.repeat(numpy.median(y), len(x)) plots.append( dict( id=plot_prefix + "_median_" + field, type="scatter", x=x, y=y2.tolist(), line=dict(color=colour), mode="lines", name="{} Median".format(data_type), ) ) else: # The user could request control limits without a center line. Assume they # want a mean in this case y2 = numpy.repeat(numpy.mean(y), len(x)) # Add the stdev if control_limits["enabled"]: x3 = numpy.concatenate((x, numpy.flip(x, axis=0))) stdev = numpy.repeat(numpy.std(y) * control_limits["sigma"], len(x)) upper = y2 + stdev lower = y2 - stdev y3 = numpy.concatenate((lower, upper)) plots.append( dict( id=plot_prefix + "_stdev_" + field, type="scatter", x=x3.tolist(), y=y3.tolist(), fill="tozerox", fillcolor=rgb_to_rgba(colour, 0.5), line=dict(color="rgba(255,255,255,0)"), name="{} Control Limits".format(data_type), ) ) return plots