def test_custom_job_no_data(self): FASTA_DATA1 = """>someseq\nAAACCCGGGGTT""" db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name='E2f1').uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = '' result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(1, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual('None', first['max']) self.assertEqual([], first['values']) self.assertEqual('AAACCCGGGGTT', first['sequence']) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def test_custom_range_list_range_sum_too_big(self): db = create_db_connection(TestWithPostgres.config.dbconfig) try: custom_list_key = save_custom_file(db, 'john', RANGE_TYPE, "1 1000 30001001") self.fail("Should have raised ValueError exception.") except ValueError as err: self.assertEqual(str(err), MAX_RANGE_ERROR_STR)
def connect_and_delete_old_lists(): config = parse_config(CONFIG_FILENAME) db = create_db_connection(config.dbconfig) cur = db.cursor() delete_old_items(cur) cur.close() db.commit() db.close()
def test_custom_job_normal_workflow(self): SHORT_SEQUENCE = 'AAACCCGGGGTT' LONG_SEQUENCE = 'AAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT' \ 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' FASTA_DATA1 = '>someseq\n' + SHORT_SEQUENCE + '\n' \ '>someseq2\n' + LONG_SEQUENCE db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "sometitle") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = """ someseq\t0\t10\t12.5\tAAACCCGGGG someseq2\t20\t30\t4.5\tGGTTAAACCC someseq2\t60\t75\t15.5\tAAAAAAAAAAAAAAA """.strip() result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() self.assertEqual(BED_DATA, CustomResultData.bed_file_contents(db, result_uuid).strip()) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(2, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual(12.5, float(first['max'])) self.assertEqual([{u'start': 0, u'end': 10, u'value': 12.5}], first['values']) self.assertEqual(SHORT_SEQUENCE, first['sequence']) second = predictions[1] self.assertEqual('someseq2', second['name']) self.assertEqual(15.5, float(second['max'])) self.assertEqual(LONG_SEQUENCE, second['sequence']) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=None, offset=None) self.assertEqual(2, len(predictions)) self.assertEqual(15.5, float(predictions[0]['max'])) self.assertEqual(12.5, float(predictions[1]['max'])) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=1, offset=1) self.assertEqual(1, len(predictions)) self.assertEqual(12.5, float(predictions[0]['max'])) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def test_custom_range_list(self): db = create_db_connection(TestWithPostgres.config.dbconfig) custom_list_key = save_custom_file(db, 'john', RANGE_TYPE, "chr1 11873 11895") params = { SearchArgs.GENE_LIST: CUSTOM_RANGES_LIST, SearchArgs.CUSTOM_LIST_DATA: custom_list_key, SearchArgs.MODEL: "E2F1_0001(JS)", SearchArgs.UPSTREAM: "100", SearchArgs.DOWNSTREAM: "100", SearchArgs.PAGE: "1", SearchArgs.PER_PAGE: "10", } predictions, search_args, search_warning = get_predictions_with_guess(db, TestWithPostgres.config, "hg19", params) self.assertEqual(len(predictions), 1) self.assertEqual(0.4, float(predictions[0]['max']))
def test_custom_gene_list_with_lc_results(self): db = create_db_connection(TestWithPostgres.config.dbconfig) custom_list_key = save_custom_file(db, 'john', GENE_LIST_TYPE, "ddx11l1") params = { SearchArgs.GENE_LIST: CUSTOM_GENE_LIST, SearchArgs.CUSTOM_LIST_DATA: custom_list_key, SearchArgs.CUSTOM_GENE_SEARCH_TYPE: CUSTOM_GENE_NAME_TYPE, SearchArgs.MODEL: "E2F1_0001(JS)", SearchArgs.UPSTREAM: "100", SearchArgs.DOWNSTREAM: "100", SearchArgs.PAGE: "1", SearchArgs.PER_PAGE: "10", } predictions, search_args, search_warning = get_predictions_with_guess(db, TestWithPostgres.config, "hg19", params) self.assertEqual(len(predictions), 1)
def test_custom_job_normal_workflow(self): FASTA_DATA1 = """>someseq\nAAACCCGGGGTT\n>someseq2\nAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT""" db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "sometitle") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = """ someseq\t0\t10\t12.5 someseq2\t20\t30\t4.5 someseq2\t60\t75\t15.5 """.strip() result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() self.assertEqual(BED_DATA, CustomResultData.bed_file_contents(db, result_uuid).strip()) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(2, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual(12.5, float(first['max'])) self.assertEqual([{u'start': 0, u'end': 10, u'value': 12.5}], first['values']) self.assertEqual('AAACCCGGGGTT', first['sequence']) second = predictions[1] self.assertEqual('someseq2', second['name']) self.assertEqual(15.5, float(second['max'])) self.assertEqual('AAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT', second['sequence']) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=None, offset=None) self.assertEqual(2, len(predictions)) self.assertEqual(15.5, float(predictions[0]['max'])) self.assertEqual(12.5, float(predictions[1]['max'])) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=1, offset=1) self.assertEqual(1, len(predictions)) self.assertEqual(12.5, float(predictions[0]['max'])) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def test_custom_range_list_bad_range(self): db = create_db_connection(TestWithPostgres.config.dbconfig) custom_list_key = save_custom_file(db, 'john', RANGE_TYPE, "chr1 91873 91883") params = { SearchArgs.GENE_LIST: CUSTOM_RANGES_LIST, SearchArgs.CUSTOM_LIST_DATA: custom_list_key, SearchArgs.MODEL: "E2F1_0001(JS)", SearchArgs.UPSTREAM: "100", SearchArgs.DOWNSTREAM: "100", SearchArgs.PAGE: "1", SearchArgs.PER_PAGE: "10", } predictions, search_args, search_warning = get_predictions_with_guess(db, TestWithPostgres.config, "hg19", params) self.assertEqual(len(predictions), 1) # we always return a record for range requests just with empty data for the matches self.assertEqual('None', predictions[0]['max'])
def test_sequence_list(self): FASTA_DATA1 = """>HSBGPG Human gene for bone gla protein (BGP) GGCAGATTCCCCCTAGACCCGCCCGCACCATGGTCAGGCATGCCCCTCCTCATCGCTGGGCACAGCCCAGAGGGT ATAAACAGTGCTGGAGGCTGGCGGGGCAGGCCAGCTGAGTCCTGAGCAGCAGCCCAGCGCAGCCACCGAGACACC >HSGLTH1 Human theta 1-globin gene CCACTGCACTCACCGCACCCGGCCAATTTTTGTGTTTTTAGTAGAGACTAAATACCATATAGTGAACACCTAAGA CGGGGGGCCTTGGATCCAGGGCGATTCAGAGGGCCCCGGTCGGAGCTGTCGGAGATTGAGCGCGCGCGGTCCCGG""" FASTA_DATA2 = """>stuff AAACCCGGGG""" db = create_db_connection(TestWithPostgres.config.dbconfig) sequence_list1_uuid = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "mystuff") sequence_list2_uuid = SequenceList.create_with_content_and_title(db, FASTA_DATA2, "mystuff2") seq_list1 = SequenceList.read_list(db, sequence_list1_uuid) seq_list2 = SequenceList.read_list(db, sequence_list2_uuid) self.assertEqual(FASTA_DATA1, seq_list1.content) self.assertEqual("mystuff", seq_list1.title) self.assertEqual("mystuff2", seq_list2.title)
def test_prediction_query(self): db = create_db_connection(TestWithPostgres.config.dbconfig) params = { SearchArgs.GENE_LIST: "knowngene", SearchArgs.MODEL: "E2F1_0001(JS)", SearchArgs.UPSTREAM: "100", SearchArgs.DOWNSTREAM: "50", SearchArgs.PAGE: "1", SearchArgs.PER_PAGE: "10", } predictions, search_args, search_warning = get_predictions_with_guess(db, TestWithPostgres.config, "hg19", params) self.assertEqual(len(predictions), 1) first_pred = predictions[0] self.assertEqual(first_pred['name'], 'uc001aaa.3; uc010nxq.1; uc010nxr.1') values = first_pred['values'] self.assertEqual(len(values), 3) pred_value_set = set([v['value'] for v in values]) self.assertIn(0.4, pred_value_set) self.assertIn(0.1, pred_value_set)
def test_custom_gene_list_id_results(self): """ These two splice variants belong to the same gene so they should list together in a single prediction. """ db = create_db_connection(TestWithPostgres.config.dbconfig) custom_list_key = save_custom_file(db, 'john', GENE_LIST_TYPE, "uc001aaa.3\nuc010nxr.1") params = { SearchArgs.GENE_LIST: CUSTOM_GENE_LIST, SearchArgs.CUSTOM_LIST_DATA: custom_list_key, SearchArgs.CUSTOM_GENE_SEARCH_TYPE: CUSTOM_ID_TYPE, SearchArgs.MODEL: "E2F1_0001(JS)", SearchArgs.UPSTREAM: "100", SearchArgs.DOWNSTREAM: "100", SearchArgs.PAGE: "1", SearchArgs.PER_PAGE: "10", } predictions, search_args, search_warning = get_predictions_with_guess(db, TestWithPostgres.config, "hg19", params) self.assertEqual(len(predictions), 1) first_pred_name = predictions[0]['name'] first_pred_name_parts = first_pred_name.split("; ") self.assertEqual(len(first_pred_name_parts), 2) self.assertIn("uc001aaa.3", first_pred_name_parts) self.assertIn("uc010nxr.1", first_pred_name_parts)
def test_custom_range_list_range_as_big_as_possible(self): db = create_db_connection(TestWithPostgres.config.dbconfig) custom_list_key = save_custom_file(db, 'john', RANGE_TYPE, "1 1000 30001000")
def test_customjob(self): FASTA_DATA1 = """>stuff\nAAACCCGGGGTT""" db = create_db_connection(TestWithPostgres.config.dbconfig) update_database(db, """ delete from custom_result_row; delete from custom_result; delete from job; delete from sequence_list_item; delete from sequence_list; """, []) # start out finding no jobs jobs = CustomJob.find_jobs(db, None) self.assertEqual(len(jobs), 0) # create a new job that should be NEW status sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist") job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid job = CustomJob.read_job(db, job_uuid) self.assertEqual(job_uuid, job.uuid) self.assertEqual(JobStatus.NEW, job.status) self.assertEqual(DataType.PREDICTION, job.type) self.assertEqual(sequence_list, job.sequence_list) self.assertIsNotNone(job.created) self.assertIsNone(job.finished) # find NEW job without filters jobs = CustomJob.find_jobs(db, None) self.assertEqual(len(jobs), 1) self.assertEqual(jobs[0].uuid, job_uuid) # find no for RUNNING jobs jobs = CustomJob.find_jobs(db, JobStatus.RUNNING) self.assertEqual(len(jobs), 0) # find 1 for NEW jobs jobs = CustomJob.find_jobs(db, JobStatus.NEW) self.assertEqual(len(jobs), 1) # Jobs can be set to running only once (when in NEW state) CustomJob.set_job_running(db, job_uuid) job = CustomJob.read_job(db, job_uuid) self.assertEqual(JobStatus.RUNNING, job.status) self.assertIsNone(job.finished) # Disallow setting a job running twice (prevents two workers working on the same job) with self.assertRaises(ValueError): CustomJob.set_job_running(db, job_uuid) # find 0 for NEW jobs jobs = CustomJob.find_jobs(db, JobStatus.NEW) self.assertEqual(len(jobs), 0) # Jobs can be set to complete from RUNNING state CustomJob.set_job_complete(db, job_uuid) job = CustomJob.read_job(db, job_uuid) self.assertEqual(JobStatus.COMPLETE, job.status) self.assertIsNotNone(job.finished) # find 0 for NEW jobs jobs = CustomJob.find_jobs(db, JobStatus.NEW) self.assertEqual(len(jobs), 0) # Jobs can be set to complete from ERROR state CustomJob.set_job_as_error(db, job_uuid, "Something failed.") job = CustomJob.read_job(db, job_uuid) self.assertEqual(JobStatus.ERROR, job.status) self.assertEqual("Something failed.", job.error_msg) self.assertIsNotNone(job.finished) # find 0 for NEW jobs jobs = CustomJob.find_jobs(db, JobStatus.NEW) self.assertEqual(len(jobs), 0)