Exemple #1
0
def post_jobs():
    """
    Create a job to create preferences/predictions for a custom sequence using the specified model(model_name).
    request['sequence_id'] str: uuid of the custom sequence to process
    request['job_type'] str: see config.DataType properties for values
    request['model_name'] str: name of the model to use
    :return: json response with id of the job
    """
    required_prop_names = ["sequence_id", "job_type", "model_name"]
    (sequence_id, job_type,
     model_name) = get_required_json_props(request, required_prop_names)
    try:
        seq = SequenceList(sequence_id)
        seq.load(get_db())
    except KeyError as ex:
        raise ClientException("Unable to find sequence. It may have purged.",
                              ErrorType.SEQUENCE_NOT_FOUND,
                              error_data=sequence_id)
    job = CustomJob.find_existing_job(get_db(), job_type, sequence_id,
                                      model_name)
    status_code = None
    if not job:
        status_code = None
        job = CustomJob.create_job(get_db(), job_type, sequence_id, model_name)
    return make_ok_json_response({'id': job.uuid}, status_code)
Exemple #2
0
def delete_old_items(cur):
    print("Deleting old custom lists.")
    cur.execute(DELETE_OLD_LISTS_SQL, [])
    print("Deleting old jobs.")
    CustomJob.delete_old_jobs(cur, DELETE_AFTER_HOURS)
    print("Deleting old custom sequences.")
    SequenceList.delete_old_and_unattached(cur, DELETE_AFTER_HOURS)
Exemple #3
0
def get_custom_sequences_data(sequence_id):
    """
    Get base64 encoded contents and other properties of a custom sequence(DNA).
    :param sequence_id: str: uuid associated with a particular sequence
    :return: json response
    """
    seq = SequenceList(sequence_id)
    seq.load(get_db())
    return make_json_response({
        "id": seq.seq_uuid,
        "data": base64.b64encode(seq.content),
        "created": seq.created
    })
def get_custom_sequences_data(sequence_id):
    """
    Get base64 encoded contents and other properties of a custom sequence(DNA).
    :param sequence_id: str: uuid associated with a particular sequence
    :return: json response
    """
    seq = SequenceList(sequence_id)
    seq.load(get_db())
    return make_json_response({
            "id": seq.seq_uuid,
            "data": base64.b64encode(seq.content),
            "created": seq.created
        })
Exemple #5
0
    def test_custom_job_no_data(self):
        FASTA_DATA1 = """>someseq\nAAACCCGGGGTT"""
        db = create_db_connection(TestWithPostgres.config.dbconfig)
        # upload FASTA file
        sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist")
        # create a job to determine predictions for a sequence_list
        job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name='E2f1').uuid
        # mark job as running
        CustomJob.set_job_running(db, job_uuid)
        # upload file
        BED_DATA = ''
        result_uuid = CustomResultData.new_uuid()
        result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA)
        result.save()

        predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False,
                                                       limit=None, offset=None)
        self.assertEqual(1, len(predictions))
        first = predictions[0]
        self.assertEqual('someseq', first['name'])
        self.assertEqual('None', first['max'])
        self.assertEqual([], first['values'])
        self.assertEqual('AAACCCGGGGTT', first['sequence'])
        # Make sure we can convert predictions to JSON
        json_version = json.dumps({'data': predictions})
        self.assertEqual('{"data', json_version[:6])
    def test_custom_job_no_data(self):
        FASTA_DATA1 = """>someseq\nAAACCCGGGGTT"""
        db = create_db_connection(TestWithPostgres.config.dbconfig)
        # upload FASTA file
        sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist")
        # create a job to determine predictions for a sequence_list
        job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name='E2f1').uuid
        # mark job as running
        CustomJob.set_job_running(db, job_uuid)
        # upload file
        BED_DATA = ''
        result_uuid = CustomResultData.new_uuid()
        result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA)
        result.save()

        predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False,
                                                       limit=None, offset=None)
        self.assertEqual(1, len(predictions))
        first = predictions[0]
        self.assertEqual('someseq', first['name'])
        self.assertEqual('None', first['max'])
        self.assertEqual([], first['values'])
        self.assertEqual('AAACCCGGGGTT', first['sequence'])
        # Make sure we can convert predictions to JSON
        json_version = json.dumps({'data': predictions})
        self.assertEqual('{"data', json_version[:6])
    def test_sequence_list(self):
        FASTA_DATA1 = """>HSBGPG Human gene for bone gla protein (BGP)
GGCAGATTCCCCCTAGACCCGCCCGCACCATGGTCAGGCATGCCCCTCCTCATCGCTGGGCACAGCCCAGAGGGT
ATAAACAGTGCTGGAGGCTGGCGGGGCAGGCCAGCTGAGTCCTGAGCAGCAGCCCAGCGCAGCCACCGAGACACC
>HSGLTH1 Human theta 1-globin gene
CCACTGCACTCACCGCACCCGGCCAATTTTTGTGTTTTTAGTAGAGACTAAATACCATATAGTGAACACCTAAGA
CGGGGGGCCTTGGATCCAGGGCGATTCAGAGGGCCCCGGTCGGAGCTGTCGGAGATTGAGCGCGCGCGGTCCCGG"""
        FASTA_DATA2 = """>stuff
AAACCCGGGG"""
        db = create_db_connection(TestWithPostgres.config.dbconfig)
        sequence_list1_uuid = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "mystuff")
        sequence_list2_uuid = SequenceList.create_with_content_and_title(db, FASTA_DATA2, "mystuff2")
        seq_list1 = SequenceList.read_list(db, sequence_list1_uuid)
        seq_list2 = SequenceList.read_list(db, sequence_list2_uuid)
        self.assertEqual(FASTA_DATA1, seq_list1.content)
        self.assertEqual("mystuff", seq_list1.title)
        self.assertEqual("mystuff2", seq_list2.title)
Exemple #8
0
    def test_sequence_list(self):
        FASTA_DATA1 = """>HSBGPG Human gene for bone gla protein (BGP)
GGCAGATTCCCCCTAGACCCGCCCGCACCATGGTCAGGCATGCCCCTCCTCATCGCTGGGCACAGCCCAGAGGGT
ATAAACAGTGCTGGAGGCTGGCGGGGCAGGCCAGCTGAGTCCTGAGCAGCAGCCCAGCGCAGCCACCGAGACACC
>HSGLTH1 Human theta 1-globin gene
CCACTGCACTCACCGCACCCGGCCAATTTTTGTGTTTTTAGTAGAGACTAAATACCATATAGTGAACACCTAAGA
CGGGGGGCCTTGGATCCAGGGCGATTCAGAGGGCCCCGGTCGGAGCTGTCGGAGATTGAGCGCGCGCGGTCCCGG"""
        FASTA_DATA2 = """>stuff
AAACCCGGGG"""
        db = create_db_connection(TestWithPostgres.config.dbconfig)
        sequence_list1_uuid = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "mystuff")
        sequence_list2_uuid = SequenceList.create_with_content_and_title(db, FASTA_DATA2, "mystuff2")
        seq_list1 = SequenceList.read_list(db, sequence_list1_uuid)
        seq_list2 = SequenceList.read_list(db, sequence_list2_uuid)
        self.assertEqual(FASTA_DATA1, seq_list1.content)
        self.assertEqual("mystuff", seq_list1.title)
        self.assertEqual("mystuff2", seq_list2.title)
Exemple #9
0
    def test_custom_job_normal_workflow(self):
        SHORT_SEQUENCE = 'AAACCCGGGGTT'
        LONG_SEQUENCE = 'AAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT' \
                      'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
        FASTA_DATA1 = '>someseq\n' + SHORT_SEQUENCE + '\n' \
                      '>someseq2\n' + LONG_SEQUENCE
        db = create_db_connection(TestWithPostgres.config.dbconfig)
        # upload FASTA file
        sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "sometitle")
        # create a job to determine predictions for a sequence_list
        job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid
        # mark job as running
        CustomJob.set_job_running(db, job_uuid)
        # upload file
        BED_DATA = """
someseq\t0\t10\t12.5\tAAACCCGGGG
someseq2\t20\t30\t4.5\tGGTTAAACCC
someseq2\t60\t75\t15.5\tAAAAAAAAAAAAAAA
            """.strip()
        result_uuid = CustomResultData.new_uuid()
        result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA)
        result.save()
        self.assertEqual(BED_DATA, CustomResultData.bed_file_contents(db, result_uuid).strip())

        predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False,
                                                       limit=None, offset=None)
        self.assertEqual(2, len(predictions))
        first = predictions[0]
        self.assertEqual('someseq', first['name'])
        self.assertEqual(12.5, float(first['max']))
        self.assertEqual([{u'start': 0, u'end': 10, u'value': 12.5}], first['values'])
        self.assertEqual(SHORT_SEQUENCE, first['sequence'])

        second = predictions[1]
        self.assertEqual('someseq2', second['name'])
        self.assertEqual(15.5, float(second['max']))
        self.assertEqual(LONG_SEQUENCE, second['sequence'])

        predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True,
                                                       limit=None, offset=None)
        self.assertEqual(2, len(predictions))
        self.assertEqual(15.5, float(predictions[0]['max']))
        self.assertEqual(12.5, float(predictions[1]['max']))

        predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True,
                                                       limit=1, offset=1)
        self.assertEqual(1, len(predictions))
        self.assertEqual(12.5, float(predictions[0]['max']))

        # Make sure we can convert predictions to JSON
        json_version = json.dumps({'data': predictions})
        self.assertEqual('{"data', json_version[:6])
def post_jobs():
    """
    Create a job to create preferences/predictions for a custom sequence using the specified model(model_name).
    request['sequence_id'] str: uuid of the custom sequence to process
    request['job_type'] str: see config.DataType properties for values
    request['model_name'] str: name of the model to use
    :return: json response with id of the job
    """
    required_prop_names = ["sequence_id", "job_type", "model_name"]
    (sequence_id, job_type, model_name) = get_required_json_props(request, required_prop_names)
    try:
        seq = SequenceList(sequence_id)
        seq.load(get_db())
    except KeyError as ex:
        raise ClientException("Unable to find sequence. It may have purged.",
                            ErrorType.SEQUENCE_NOT_FOUND,
                            error_data=sequence_id)
    job = CustomJob.find_existing_job(get_db(), job_type, sequence_id, model_name)
    status_code = None
    if not job:
        status_code = None
        job = CustomJob.create_job(get_db(), job_type, sequence_id, model_name)
    return make_ok_json_response({'id': job.uuid}, status_code)
Exemple #11
0
 def test_sequence_too_big(self):
     with self.assertRaises(ClientException) as cm:
         seq_list = SequenceList('1234')
         seq_list.content = make_too_big()
         seq_list.title = 'Too big list'
         seq_list.insert(db=FakeDB())
     self.assertEqual(ErrorType.UPLOADED_DATA_TOO_BIG,
                      cm.exception.error_type)
     self.assertEqual(400, cm.exception.status_code)
    def test_custom_job_normal_workflow(self):
        FASTA_DATA1 = """>someseq\nAAACCCGGGGTT\n>someseq2\nAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT"""
        db = create_db_connection(TestWithPostgres.config.dbconfig)
        # upload FASTA file
        sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "sometitle")
        # create a job to determine predictions for a sequence_list
        job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid
        # mark job as running
        CustomJob.set_job_running(db, job_uuid)
        # upload file
        BED_DATA = """
someseq\t0\t10\t12.5
someseq2\t20\t30\t4.5
someseq2\t60\t75\t15.5
            """.strip()
        result_uuid = CustomResultData.new_uuid()
        result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA)
        result.save()
        self.assertEqual(BED_DATA, CustomResultData.bed_file_contents(db, result_uuid).strip())

        predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False,
                                                       limit=None, offset=None)
        self.assertEqual(2, len(predictions))
        first = predictions[0]
        self.assertEqual('someseq', first['name'])
        self.assertEqual(12.5, float(first['max']))
        self.assertEqual([{u'start': 0, u'end': 10, u'value': 12.5}], first['values'])
        self.assertEqual('AAACCCGGGGTT', first['sequence'])

        second = predictions[1]
        self.assertEqual('someseq2', second['name'])
        self.assertEqual(15.5, float(second['max']))
        self.assertEqual('AAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT', second['sequence'])

        predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True,
                                                       limit=None, offset=None)
        self.assertEqual(2, len(predictions))
        self.assertEqual(15.5, float(predictions[0]['max']))
        self.assertEqual(12.5, float(predictions[1]['max']))

        predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True,
                                                       limit=1, offset=1)
        self.assertEqual(1, len(predictions))
        self.assertEqual(12.5, float(predictions[0]['max']))

        # Make sure we can convert predictions to JSON
        json_version = json.dumps({'data': predictions})
        self.assertEqual('{"data', json_version[:6])
    def test_customjob(self):
        FASTA_DATA1 = """>stuff\nAAACCCGGGGTT"""
        db = create_db_connection(TestWithPostgres.config.dbconfig)

        update_database(db, """
          delete from custom_result_row;
          delete from custom_result;
          delete from job;
          delete from sequence_list_item;
          delete from sequence_list;
        """, [])
        # start out finding no jobs
        jobs = CustomJob.find_jobs(db, None)
        self.assertEqual(len(jobs), 0)

        # create a new job that should be NEW status
        sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist")
        job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid
        job = CustomJob.read_job(db, job_uuid)
        self.assertEqual(job_uuid, job.uuid)
        self.assertEqual(JobStatus.NEW, job.status)
        self.assertEqual(DataType.PREDICTION, job.type)
        self.assertEqual(sequence_list, job.sequence_list)
        self.assertIsNotNone(job.created)
        self.assertIsNone(job.finished)

        # find NEW job without filters
        jobs = CustomJob.find_jobs(db, None)
        self.assertEqual(len(jobs), 1)
        self.assertEqual(jobs[0].uuid, job_uuid)
        # find no for RUNNING jobs
        jobs = CustomJob.find_jobs(db, JobStatus.RUNNING)
        self.assertEqual(len(jobs), 0)
        # find 1 for NEW jobs
        jobs = CustomJob.find_jobs(db, JobStatus.NEW)
        self.assertEqual(len(jobs), 1)

        # Jobs can be set to running only once (when in NEW state)
        CustomJob.set_job_running(db, job_uuid)
        job = CustomJob.read_job(db, job_uuid)
        self.assertEqual(JobStatus.RUNNING, job.status)
        self.assertIsNone(job.finished)
        # Disallow setting a job running twice (prevents two workers working on the same job)
        with self.assertRaises(ValueError):
            CustomJob.set_job_running(db, job_uuid)

        # find 0 for NEW jobs
        jobs = CustomJob.find_jobs(db, JobStatus.NEW)
        self.assertEqual(len(jobs), 0)

        # Jobs can be set to complete from RUNNING state
        CustomJob.set_job_complete(db, job_uuid)
        job = CustomJob.read_job(db, job_uuid)
        self.assertEqual(JobStatus.COMPLETE, job.status)
        self.assertIsNotNone(job.finished)

        # find 0 for NEW jobs
        jobs = CustomJob.find_jobs(db, JobStatus.NEW)
        self.assertEqual(len(jobs), 0)

        # Jobs can be set to complete from ERROR state
        CustomJob.set_job_as_error(db, job_uuid, "Something failed.")
        job = CustomJob.read_job(db, job_uuid)
        self.assertEqual(JobStatus.ERROR, job.status)
        self.assertEqual("Something failed.", job.error_msg)
        self.assertIsNotNone(job.finished)

        # find 0 for NEW jobs
        jobs = CustomJob.find_jobs(db, JobStatus.NEW)
        self.assertEqual(len(jobs), 0)
def post_custom_sequences():
    (data, title) = get_required_json_props(request, ["data", "title"])
    decoded_data = base64.b64decode(data)
    seq_uuid = SequenceList.create_with_content_and_title(get_db(), decoded_data, title)
    return make_ok_json_response({'id': seq_uuid})
Exemple #15
0
    def test_customjob(self):
        FASTA_DATA1 = """>stuff\nAAACCCGGGGTT"""
        db = create_db_connection(TestWithPostgres.config.dbconfig)

        update_database(db, """
          delete from custom_result_row;
          delete from custom_result;
          delete from job;
          delete from sequence_list_item;
          delete from sequence_list;
        """, [])
        # start out finding no jobs
        jobs = CustomJob.find_jobs(db, None)
        self.assertEqual(len(jobs), 0)

        # create a new job that should be NEW status
        sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist")
        job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid
        job = CustomJob.read_job(db, job_uuid)
        self.assertEqual(job_uuid, job.uuid)
        self.assertEqual(JobStatus.NEW, job.status)
        self.assertEqual(DataType.PREDICTION, job.type)
        self.assertEqual(sequence_list, job.sequence_list)
        self.assertIsNotNone(job.created)
        self.assertIsNone(job.finished)

        # find NEW job without filters
        jobs = CustomJob.find_jobs(db, None)
        self.assertEqual(len(jobs), 1)
        self.assertEqual(jobs[0].uuid, job_uuid)
        # find no for RUNNING jobs
        jobs = CustomJob.find_jobs(db, JobStatus.RUNNING)
        self.assertEqual(len(jobs), 0)
        # find 1 for NEW jobs
        jobs = CustomJob.find_jobs(db, JobStatus.NEW)
        self.assertEqual(len(jobs), 1)

        # Jobs can be set to running only once (when in NEW state)
        CustomJob.set_job_running(db, job_uuid)
        job = CustomJob.read_job(db, job_uuid)
        self.assertEqual(JobStatus.RUNNING, job.status)
        self.assertIsNone(job.finished)
        # Disallow setting a job running twice (prevents two workers working on the same job)
        with self.assertRaises(ValueError):
            CustomJob.set_job_running(db, job_uuid)

        # find 0 for NEW jobs
        jobs = CustomJob.find_jobs(db, JobStatus.NEW)
        self.assertEqual(len(jobs), 0)

        # Jobs can be set to complete from RUNNING state
        CustomJob.set_job_complete(db, job_uuid)
        job = CustomJob.read_job(db, job_uuid)
        self.assertEqual(JobStatus.COMPLETE, job.status)
        self.assertIsNotNone(job.finished)

        # find 0 for NEW jobs
        jobs = CustomJob.find_jobs(db, JobStatus.NEW)
        self.assertEqual(len(jobs), 0)

        # Jobs can be set to complete from ERROR state
        CustomJob.set_job_as_error(db, job_uuid, "Something failed.")
        job = CustomJob.read_job(db, job_uuid)
        self.assertEqual(JobStatus.ERROR, job.status)
        self.assertEqual("Something failed.", job.error_msg)
        self.assertIsNotNone(job.finished)

        # find 0 for NEW jobs
        jobs = CustomJob.find_jobs(db, JobStatus.NEW)
        self.assertEqual(len(jobs), 0)
Exemple #16
0
def post_custom_sequences():
    (data, title) = get_required_json_props(request, ["data", "title"])
    decoded_data = base64.b64decode(data)
    seq_uuid = SequenceList.create_with_content_and_title(
        get_db(), decoded_data, title)
    return make_ok_json_response({'id': seq_uuid})
Exemple #17
0
 def test_sequence_good_size(self):
     seq_list = SequenceList('1234')
     seq_list.content = "myseq>\nAACCGGTTAACCGTTTTTAACCTTGGG"
     seq_list.title = 'Good list'
     seq_list.insert(db=FakeDB())