def test_custom_job_no_data(self): FASTA_DATA1 = """>someseq\nAAACCCGGGGTT""" db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name='E2f1').uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = '' result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(1, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual('None', first['max']) self.assertEqual([], first['values']) self.assertEqual('AAACCCGGGGTT', first['sequence']) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def get_custom_result_raw_data(result_id): bed_file_contents = CustomResultData.bed_file_contents(get_db(), result_id) def gen(): yield bed_file_contents return download_file_response("data.bed", gen())
def search_custom_results(result_id): """ Search a result for predictions. request['maxPredictionSort'] - when true sort by max prediction request['all'] - include values in download request['page'] - which page of results to show request['perPage'] - items per page to show :param result_id: str: uuid of the custom_predictions/custom_preferences we want to search :return: json response with 'result' property containing an array of predictions """ args = request.args format = args.get('format') sort_by_max = args.get('maxPredictionSort') if sort_by_max == 'false': sort_by_max = None all_values = args.get('all') page = get_optional_int(args, 'page') per_page = get_optional_int(args, 'per_page') offset = None if page and per_page: offset = (page - 1) * per_page predictions = CustomResultData.get_predictions(get_db(), result_id, sort_by_max, per_page, offset) if format == 'tsv' or format == 'csv': filename = "custom_result.{}".format(format) separator = ',' if format == 'tsv': separator = '\t' return download_file_response(filename, make_download_custom_result(separator, all_values, predictions)) else: return make_ok_json_response({ 'result': predictions})
def search_custom_results(result_id): """ Search a result for predictions. request['maxPredictionSort'] - when true sort by max prediction request['all'] - include values in download request['page'] - which page of results to show request['perPage'] - items per page to show :param result_id: str: uuid of the custom_predictions/custom_preferences we want to search :return: json response with 'result' property containing an array of predictions """ args = request.args format = args.get('format') sort_by_max = args.get('maxPredictionSort') if sort_by_max == 'false': sort_by_max = None all_values = args.get('all') page = get_optional_int(args, 'page') per_page = get_optional_int(args, 'per_page') offset = None if page and per_page: offset = (page - 1) * per_page predictions = CustomResultData.get_predictions(get_db(), result_id, sort_by_max, per_page, offset) if format == 'tsv' or format == 'csv': filename = "custom_result.{}".format(format) separator = ',' if format == 'tsv': separator = '\t' return download_file_response( filename, make_download_custom_result(separator, all_values, predictions)) else: return make_ok_json_response({'result': predictions})
def test_custom_job_normal_workflow(self): SHORT_SEQUENCE = 'AAACCCGGGGTT' LONG_SEQUENCE = 'AAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT' \ 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' FASTA_DATA1 = '>someseq\n' + SHORT_SEQUENCE + '\n' \ '>someseq2\n' + LONG_SEQUENCE db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "sometitle") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = """ someseq\t0\t10\t12.5\tAAACCCGGGG someseq2\t20\t30\t4.5\tGGTTAAACCC someseq2\t60\t75\t15.5\tAAAAAAAAAAAAAAA """.strip() result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() self.assertEqual(BED_DATA, CustomResultData.bed_file_contents(db, result_uuid).strip()) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(2, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual(12.5, float(first['max'])) self.assertEqual([{u'start': 0, u'end': 10, u'value': 12.5}], first['values']) self.assertEqual(SHORT_SEQUENCE, first['sequence']) second = predictions[1] self.assertEqual('someseq2', second['name']) self.assertEqual(15.5, float(second['max'])) self.assertEqual(LONG_SEQUENCE, second['sequence']) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=None, offset=None) self.assertEqual(2, len(predictions)) self.assertEqual(15.5, float(predictions[0]['max'])) self.assertEqual(12.5, float(predictions[1]['max'])) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=1, offset=1) self.assertEqual(1, len(predictions)) self.assertEqual(12.5, float(predictions[0]['max'])) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def post_custom_result(): """ Save custom prediction/preferences results. Secured via apache config: production/imads.conf. request['job_id'] - str: uuid of the job associated with these results request['bed_data'] - str: data that makes up the results request['model_name'] - str: name of the model used to build these results :return: json response with uuid of result stored in 'id' field """ required_prop_names = ["job_id", "model_name"] (job_id, model_name) = get_required_json_props(request, required_prop_names) bed_data = request.get_json().get('bed_data') decoded_bed_data = base64.b64decode(bed_data) result_uuid = CustomResultData.new_uuid() result_data = CustomResultData(get_db(), result_uuid, job_id, model_name, decoded_bed_data) result_data.save() return make_json_response({'result': 'ok', 'id': result_uuid})
def post_custom_result(): """ Save custom prediction/preferences results. Secured via apache config: production/tfpredictions.conf. request['job_id'] - str: uuid of the job associated with these results request['bed_data'] - str: data that makes up the results request['model_name'] - str: name of the model used to build these results :return: json response with uuid of result stored in 'id' field """ required_prop_names = ["job_id", "model_name"] (job_id, model_name) = get_required_json_props(request, required_prop_names) bed_data = request.get_json().get('bed_data') decoded_bed_data = base64.b64decode(bed_data) result_uuid = CustomResultData.new_uuid() result_data = CustomResultData(get_db(), result_uuid, job_id, model_name, decoded_bed_data) result_data.save() return make_json_response({'result': 'ok', 'id': result_uuid})
def find_custom_result(): """ Find a single prediction for a sequence_id and model_name. request['sequence_id'] str: uuid of the custom sequence to look for request['model_name'] str: name of the model we are looking for a :return: json response with id field that is either None or the uuid of the custom_predictions/custom_preferences. """ sequence_id = request.args['sequence_id'] model_name = request.args['model_name'] custom_result_id = CustomResultData.find_one(get_db(), sequence_id, model_name) return make_ok_json_response({'id': custom_result_id})
def test_determine_last_page(self, mock_read_database): test_data = [ # (num_items, per_page, expected_last_page) (0, 10, 0), (1, 10, 1), (10, 10, 1), (11, 10, 2), (22, 3, 8), ] for num_items, per_page, expected_last_page in test_data: mock_read_database.return_value = [[num_items]] last_page = CustomResultData.determine_last_page(db=None, result_uuid='123-456-780', per_page=per_page) self.assertEqual(last_page, expected_last_page)
def test_custom_result_sequence_lookup(self, mock_read_database): mock_read_database.return_value = [ ('wild', 'attattattatt'), ('normal', 'catcatcatcat') ] sequence_lookup = CustomResultData.custom_result_sequence_lookup(db=None, result_id='456') self.assertEqual(sequence_lookup, {'normal': 'catcatcatcat', 'wild': 'attattattatt'}) args, kwargs = mock_read_database.call_args db, sql, params = args self.assertEqual(params, ['456']) self.assertIn('select sequence_list_item.name, sequence_list_item.sequence', sql)
def test_custom_job_normal_workflow(self): FASTA_DATA1 = """>someseq\nAAACCCGGGGTT\n>someseq2\nAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT""" db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "sometitle") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = """ someseq\t0\t10\t12.5 someseq2\t20\t30\t4.5 someseq2\t60\t75\t15.5 """.strip() result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() self.assertEqual(BED_DATA, CustomResultData.bed_file_contents(db, result_uuid).strip()) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(2, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual(12.5, float(first['max'])) self.assertEqual([{u'start': 0, u'end': 10, u'value': 12.5}], first['values']) self.assertEqual('AAACCCGGGGTT', first['sequence']) second = predictions[1] self.assertEqual('someseq2', second['name']) self.assertEqual(15.5, float(second['max'])) self.assertEqual('AAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT', second['sequence']) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=None, offset=None) self.assertEqual(2, len(predictions)) self.assertEqual(15.5, float(predictions[0]['max'])) self.assertEqual(12.5, float(predictions[1]['max'])) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=1, offset=1) self.assertEqual(1, len(predictions)) self.assertEqual(12.5, float(predictions[0]['max'])) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def find_custom_results_for_sequence_and_model_name(): """ Find a custom results for a sequence_id. request['sequence_id'] str: sequence id to use when searching custom results request['model_name'] str: optional model name to filter with :return: json response with results array of dict with keys resultId,modelName,sequenceId """ sequence_id = request.args.get('sequence_id') model_name = request.args.get('model_name') if not sequence_id: raise ValueError("Missing required sequence_id field.") custom_result_ids = CustomResultData.find(get_db(), sequence_id, model_name) return make_ok_json_response({'results': custom_result_ids})
def test_bed_file_contents(self, mock_read_database): mock_read_database.side_effect = [ [ # name dna sequence (custom_result_sequence_lookup query response data) ('wild', 'attattattatt'), ('normal', 'catcatcatcat') ], [ # name, start, stop, value (bed_file_contents query response data) ('wild', 4, 8, 0.9), ('normal', 7, 11, 0.4), ] ] bed_contents = CustomResultData.bed_file_contents(db=None, result_id='123') expected = """ wild\t4\t8\t0.9\tttat normal\t7\t11\t0.4\tatca """ self.assertEqual(bed_contents.strip(), expected.strip())
def test_find_with_model_filter(self): self.query_sql = '' self.query_params = '' def fake_read_database(db, sql, params): self.query_sql = sql self.query_params = params return [ ('123', 'ELK1') ] sv_read_database = pred.webserver.customresult.read_database try: pred.webserver.customresult.read_database = fake_read_database uid = '8B9836B5-8E3D-4346-AB12-69DD10313C77' results = CustomResultData.find(db=None, sequence_id=uid, model_name='ELK1') self.assertEqual(1, len(results)) item = results[0] self.assertEqual('123', item['resultId']) self.assertEqual('ELK1', item['modelName']) self.assertEqual('8B9836B5-8E3D-4346-AB12-69DD10313C77', item['sequenceId']) self.assertIn('custom_result.model_name =', self.query_sql) self.assertEqual(['8B9836B5-8E3D-4346-AB12-69DD10313C77', 'ELK1'], self.query_params) finally: pred.webserver.customresult.read_database = sv_read_database
def test_last_page_query_and_params(self): query, params = CustomResultData.last_page_query_and_params(result_uuid='123-456-789') self.assertIn('select count(*)', query) self.assertIn('from custom_result', query) self.assertIn('custom_result.id = %s', query) self.assertEqual(params, ['123-456-789'])
def delete_old_jobs(cur, hours): for old_job in CustomJob.find_old_jobs(cur, hours): CustomResultData.delete_for_job(cur, old_job.uuid) old_job.delete(cur) return None
def test_find_with_invalid_uuid(self): with self.assertRaises(ClientException): CustomResultData.find(db=None, sequence_id='', model_name=None)