Example #1
0
    def test_determines_for_which_jobs_check_status_1(self):
        """
        Given a set of jobs currently in the database, knows for which it is required to check the status.
        In this case, some jobs require a check, the ones running in the same run environment
        """
        self.create_test_jobs_2()
        print('here 1')
        current_run_environment = RUN_CONFIG.get('run_env')
        print('current_run_environment: ', current_run_environment)

        with self.flask_app.app_context():
            lsf_config = RUN_CONFIG.get('lsf_submission')
            lsf_host = lsf_config['lsf_host']

            status_is_not_error_or_finished = delayed_job_models.DelayedJob.status.notin_(
                [delayed_job_models.JobStatuses.ERROR, delayed_job_models.JobStatuses.FINISHED]
            )
            lsf_host_is_my_host = delayed_job_models.DelayedJob.lsf_host == lsf_host

            run_environment_is_my_current_environment = \
                delayed_job_models.DelayedJob.run_environment == current_run_environment


            job_to_check_status_must_be = delayed_job_models.DelayedJob.query.filter(
                and_(lsf_host_is_my_host, status_is_not_error_or_finished, run_environment_is_my_current_environment)
            )

            lsf_ids_to_check_status_must_be = [job.lsf_job_id for job in job_to_check_status_must_be]
            job_ids_must_be = [job.id for job in job_to_check_status_must_be]
            lsf_ids_to_check_got = daemon.get_lsf_job_ids_to_check()
            self.assertListEqual(lsf_ids_to_check_status_must_be, lsf_ids_to_check_got,
                                 msg=f'The jobs for which to check the status were not created '
                                     f'correctly! jobs must be {job_ids_must_be}')
Example #2
0
def save_custom_statistics_download_job(job_id, time_taken, desired_format,
                                        file_size, es_index, es_query,
                                        total_items):
    """
    Saves the custom statistics for the download job
    :param job_id: id of the job, just as a test that the job exists
    :param time_taken: Time in seconds taken to do the download
    :param desired_format: Format requested for the download
    :param file_size: The resulting file size of the job in bytes
    :param es_index: Name of the index for which the download was generated
    :param es_query: Query used for the download
    :param total_items: The number of items saved in the file
    """
    check_if_job_exists(job_id)

    doc = {
        'desired_format': desired_format,
        'download_id': job_id,
        'es_index': es_index,
        'es_query': es_query,
        'file_size': file_size,
        'host': 'delayed_jobs_k8s',
        'is_new': False,
        'request_date': datetime.datetime.utcnow().timestamp() * 1000,
        'run_env_type': RUN_CONFIG.get('run_env'),
        'time_taken': time_taken,
        'total_items': total_items
    }

    index_name = RUN_CONFIG.get('job_statistics').get(
        'download_job_statistics_index')
    statistics_saver.save_record_to_elasticsearch(doc, index_name)
    return {'operation_result': 'Statistics successfully saved!'}
def save_shortened_url(long_url, url_hash):
    """
    Saves the shortened url to es
    :param long_url: full url to save
    :param url_hash: hash of the url
    """

    now = datetime.utcnow()
    time_delta = timedelta(
        days=RUN_CONFIG.get('url_shortening').get('days_valid'))
    expiration_date = now + time_delta
    expires = expiration_date.timestamp() * 1000

    index_name = RUN_CONFIG.get('url_shortening').get('index_name')

    document = {
        'long_url': long_url,
        'hash': url_hash,
        'expires': expires,
        'creation_date_2': int(now.timestamp() * 1000)
    }

    dry_run = RUN_CONFIG.get('url_shortening').get('dry_run')
    if dry_run:
        app_logging.debug(
            f'Dry run is true, not saving the document {document} to the index {index_name}'
        )
    else:
        es_data.save_es_doc(index_name, document, refresh='wait_for')

    return expiration_date
Example #4
0
    def create_test_jobs_1(self):
        """
        This will create:
        - 2 Jobs in error state, each running in a different lsf cluster
        - 2 Jobs in finished state, each running in a different lsf cluster
        """
        run_environment = RUN_CONFIG.get('run_env')
        lsf_config = RUN_CONFIG.get('lsf_submission')
        lsf_host = lsf_config['lsf_host']

        with self.flask_app.app_context():

            i = 0
            for status in [delayed_job_models.JobStatuses.FINISHED,
                           delayed_job_models.JobStatuses.ERROR]:



                for assigned_host in [lsf_host, 'another_host']:
                    job = delayed_job_models.DelayedJob(
                        id=f'Job-{assigned_host}-{status}',
                        type='TEST',
                        lsf_job_id=i,
                        status=status,
                        lsf_host=assigned_host,
                        run_environment=run_environment,
                        created_at=datetime.utcnow(),
                        started_at=datetime.utcnow() + timedelta(seconds=1),
                        finished_at=datetime.utcnow() + timedelta(seconds=2)
                    )
                    job.output_dir_path = job_submission_service.get_job_output_dir_path(job)
                    os.makedirs(job.output_dir_path, exist_ok=True)
                    delayed_job_models.save_job(job)
                    i += 1
def record_url_was_shortened():
    """
    Records that an url was shortened.
    """
    doc = {
        "event": "URL_SHORTENED",
        "run_env_type": RUN_CONFIG.get('run_env'),
        "host": 'es_proxy_api_k8s',
        "request_date": datetime.utcnow().timestamp() * 1000,
    }

    index_name = RUN_CONFIG.get('url_shortening').get('statistics_index_name')
    save_record_to_elasticsearch(doc, index_name)
def record_expired_urls_were_deleted():
    """
    Records that the expired urls were deleted
    """
    doc = {
        "event": "EXPIRED_URLS_DELETED",
        "run_env_type": RUN_CONFIG.get('run_env'),
        "host": 'es_proxy_api_k8s',
        "request_date": datetime.utcnow().timestamp() * 1000,
    }

    index_name = RUN_CONFIG.get('url_shortening').get('statistics_index_name')
    save_record_to_elasticsearch(doc, index_name)
Example #7
0
def get_json():
    """
    returns the json response with the swagger description
    """

    yaml_file_path = Path(Path().absolute()).joinpath('app', 'swagger',
                                                      'swagger.yaml')

    with open(yaml_file_path, 'r') as stream:
        swagger_desc = yaml.safe_load(stream)
        swagger_desc['host'] = RUN_CONFIG.get('server_public_host')
        swagger_desc['basePath'] = RUN_CONFIG.get('base_path')
        return jsonify(swagger_desc)
Example #8
0
def check_jobs_status(delete_lock_after_finishing=True):
    """
    The main function of this module. Checks for jobs to check the status, and checks their status in lsf
    :param delete_lock_after_finishing: determines if explicitly deletes the lock after finishing
    :return: (sleeptime, jobs_were_checked) the amount of seconds to wait for the next run and if the jobs
    were checked or not
    """
    lsf_config = RUN_CONFIG.get('lsf_submission')
    current_lsf_host = lsf_config['lsf_host']
    my_hostname = socket.gethostname()

    min_sleep_time = RUN_CONFIG.get('status_agent').get('min_sleep_time')
    max_sleep_time = RUN_CONFIG.get('status_agent').get('max_sleep_time')

    sleep_time = random.uniform(min_sleep_time, max_sleep_time)

    existing_lock = locks.get_lock_for_lsf_host(current_lsf_host)
    if existing_lock is not None:

        print(f'I ({my_hostname}) found a lock, waiting {sleep_time} seconds before checking again')
        return sleep_time, False

    else:
        print(f'Locking LSF status check for {current_lsf_host}, I am {my_hostname}')
        locks.set_lsf_lock(current_lsf_host, my_hostname)

    print('Looking for jobs to check...')
    lsf_job_ids_to_check = get_lsf_job_ids_to_check()
    print(f'lsf_job_ids_to_check: {lsf_job_ids_to_check}')

    if len(lsf_job_ids_to_check) == 0:
        locks.delete_lsf_lock(current_lsf_host) if delete_lock_after_finishing else None
        return sleep_time, True

    script_path = prepare_job_status_check_script(lsf_job_ids_to_check)
    must_run_script = RUN_CONFIG.get('run_status_script', True)
    if not must_run_script:
        print('Not running script because run_status_script is False')
        locks.delete_lsf_lock(current_lsf_host) if delete_lock_after_finishing else None
        return sleep_time, False

    try:
        script_output = get_status_script_output(script_path)
        os.remove(script_path)  # Remove the script after running so it doesn't fill up the NFS
        print(f'deleted script: {script_path}')
        parse_bjobs_output(script_output)
        locks.delete_lsf_lock(current_lsf_host) if delete_lock_after_finishing else None
        return sleep_time, True
    except JobStatusDaemonError as error:
        print(error)
def save_record_to_elasticsearch(doc, index_name):

    dry_run = RUN_CONFIG.get('job_statistics', {}).get('dry_run', False)
    es_host = RUN_CONFIG.get('elasticsearch', {}).get('host')

    if dry_run:
        app_logging.debug(
            f'Not actually sending the record to the statistics (dry run): {doc}'
        )
    else:
        app_logging.debug(
            f'Sending the following record to the statistics: {doc} '
            f'index name: {index_name} es_host: {es_host}')
        result = ES.index(index=index_name, body=doc, doc_type='_doc')
        app_logging.debug(f'Result {result}')
Example #10
0
def submit_job_to_lsf(job):
    """
    Runs a script that submits the job to LSF
    :param job: DelayedJob object
    """
    submit_file_path = get_job_submission_script_file_path(job)
    submission_output_path = Path(submit_file_path).parent.joinpath(
        'submission.out')
    submission_error_path = Path(submit_file_path).parent.joinpath(
        'submission.err')

    lsf_config = RUN_CONFIG.get('lsf_submission')
    id_rsa_path = lsf_config['id_rsa_file']

    run_command = f'{submit_file_path} {id_rsa_path}'
    app_logging.debug(
        f'Going to run job submission script, command: {run_command}')

    must_run_jobs = RUN_CONFIG.get('run_jobs', True)
    if not must_run_jobs:
        app_logging.debug(f'Not submitting jobs because run_jobs is False')
        return

    submission_process = subprocess.run(run_command.split(' '),
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)

    app_logging.debug(f'Submission STD Output: \n {submission_process.stdout}')
    app_logging.debug(f'Submission STD Error: \n {submission_process.stderr}')

    with open(submission_output_path, 'wb') as submission_out_file:
        submission_out_file.write(submission_process.stdout)

    with open(submission_error_path, 'wb') as submission_err_file:
        submission_err_file.write(submission_process.stderr)

    return_code = submission_process.returncode
    app_logging.debug(f'submission return code was: {return_code}')
    if return_code != 0:
        raise JobSubmissionError(
            'There was an error when running the job submission script! Please check the logs'
        )

    lsf_job_id = get_lsf_job_id(str(submission_process.stdout))
    job.lsf_job_id = lsf_job_id
    job.status = delayed_job_models.JobStatuses.QUEUED
    delayed_job_models.save_job(job)
    app_logging.debug(f'LSF Job ID is: {lsf_job_id}')
    def test_gets_all_configured_properties(self):
        """
        Tests that it returns all the configured properties for an index
        """

        groups_configuration_manager = get_group_configuration_instance()

        es_index_prefix = RUN_CONFIG.get('es_index_prefix')
        index_name = f'{es_index_prefix}activity'

        props_list_must_be = [
            '_metadata.organism_taxonomy.oc_id',
            '_metadata.assay_data.assay_subcellular_fraction',
            '_metadata.activity_generated.short_data_validity_comment',
            '_metadata.assay_data.assay_cell_type',
            '_metadata.assay_data.assay_organism',
            '_metadata.assay_data.assay_tissue'
        ]

        props_list_got = groups_configuration_manager.get_list_of_configured_properties(
            index_name)

        self.assertEqual(sorted(props_list_got),
                         sorted(props_list_must_be),
                         msg='The properties list is not correct!')
Example #12
0
def get_status_script_output(script_path):
    """
    Runs the status script and returns a text with the output obtained, if there is an error raises an exception
    :param script_path: path of the script
    :return: the text output of stdout
    """
    lsf_config = RUN_CONFIG.get('lsf_submission')
    id_rsa_path = lsf_config['id_rsa_file']
    run_command = f'{script_path} {id_rsa_path}'
    print(f'Going to run job status script, command: {run_command}')
    status_check_process = subprocess.run(run_command.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    print(f'Output: \n {status_check_process.stdout}')
    print(f'Error: \n {status_check_process.stderr}')

    return_code = status_check_process.returncode
    print(f'script return code was: {return_code}')

    if return_code != 0:

        status_output_path = f'{script_path}.out'
        status_error_path = f'{script_path}.err'

        with open(status_output_path, 'wb') as status_out_file:
            status_out_file.write(status_check_process.stdout)

        with open(status_error_path, 'wb') as status_err_file:
            status_err_file.write(status_check_process.stderr)

        raise JobStatusDaemonError('There was an error when running the job status script! Please check the logs')
    else:
        return status_check_process.stdout.decode()
Example #13
0
def prepare_job_status_check_script(lsf_job_ids):
    """
    Prepares the script that will check for the job status to LSF
    :lsf_job_ids: the list of job ids for which check the status
    :return: the final path of the script that was created
    """

    job_status_script_template_path = os.path.join(Path().absolute(), 'templates', 'get_jobs_status.sh')
    with open(job_status_script_template_path, 'r') as template_file:
        job_status_template = template_file.read()

        lsf_config = RUN_CONFIG.get('lsf_submission')
        lsf_user = lsf_config['lsf_user']
        lsf_host = lsf_config['lsf_host']

        job_submission_script = job_status_template.format(
            LSF_JOB_IDS=' '.join([str(lsf_job_id) for lsf_job_id in lsf_job_ids]),
            LSF_USER=lsf_user,
            LSF_HOST=lsf_host
        )

        status_script_path = get_check_job_status_script_path()
        status_script_path.parent.mkdir(parents=True, exist_ok=True)
        with open(status_script_path, 'w') as status_script_file:
            status_script_file.write(job_submission_script)

        print(f'created script: {status_script_path}')
        # make sure file is executable
        file_stats = os.stat(status_script_path)
        os.chmod(status_script_path, file_stats.st_mode | stat.S_IEXEC)

    return status_script_path
def get_items_with_context(index_name, raw_es_query, raw_context, raw_contextual_sort_data='{}'):
    """
    :param index_name: name of the index to query
    :param raw_es_query: es_query stringifyied
    :param raw_context: context dict stringifyied
    :param raw_contextual_sort_data:
    :return: the items in the es_query with the context given in the context description
    """

    context_dict = json.loads(raw_context)
    context, total_results = context_loader.get_context(context_dict)

    id_properties = es_mappings.get_id_properties_for_index(index_name)
    # create a context index so access is faster
    context_id = context_dict['context_id']
    context_index = context_loader.load_context_index(context_id, id_properties, context)

    if raw_contextual_sort_data is not None:
        contextual_sort_data = json.loads(raw_contextual_sort_data)
    else:
        contextual_sort_data = {}

    search_data_with_injections = get_search_data_with_injections(raw_es_query, contextual_sort_data, id_properties,
                                                                  total_results, context_index)
    raw_search_data_with_injections = json.dumps(search_data_with_injections)
    es_response = es_data.get_es_response(index_name, json.loads(raw_search_data_with_injections))
    add_context_values_to_response(es_response, context_index)

    metadata = {
        'total_results': len(context_index),
        'max_results_injected': RUN_CONFIG.get('filter_query_max_clauses')
    }
    return es_response, metadata
def do_multisearch(body):
    """
    :param body: body of the multisearch
    :return: the result of the multisearch
    """
    cache_key = get_multisearch_cache_key(body)
    app_logging.debug(f'cache_key: {cache_key}')

    start_time = time.time()
    cache_response = cache.fail_proof_get(key=cache_key)
    if cache_response is not None:
        end_time = time.time()
        time_taken = end_time - start_time
        app_logging.debug(f'results were cached')
        record_that_response_was_cached('multisearch', {'query': body}, time_taken)
        return cache_response

    app_logging.debug(f'results were not cached')

    start_time = time.time()
    result = ES.msearch(body=body)
    end_time = time.time()
    time_taken = end_time - start_time

    record_that_response_not_cached('multisearch', {'query': body}, time_taken)

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
    cache.fail_proof_set(key=cache_key, value=result, timeout=seconds_valid)

    return result
    def test_gets_config_for_a_list_of_properties(self):
        """
        Test it gets the configuration for a list of properties
        """
        groups_configuration_manager = get_group_configuration_instance()

        es_index_prefix = RUN_CONFIG.get('es_index_prefix')
        index_name = f'{es_index_prefix}activity'
        props = [
            '_metadata.activity_generated.short_data_validity_comment',
            '_metadata.assay_data.assay_cell_type'
        ]

        configs_got = groups_configuration_manager.get_config_for_props_list(
            index_name, props)
        config = configs_got[0]
        self.assertEqual(config['index_name'], index_name)
        self.assertEqual(config['prop_id'], props[0])
        self.assertTrue(config['aggregatable'])
        self.assertEqual(config['type'], 'string')
        self.assertEqual(config['label'], 'My custom label')
        self.assertEqual(config['label_mini'], 'My cstm lbl')

        config = configs_got[1]
        self.assertEqual(config['index_name'], index_name)
        self.assertEqual(config['prop_id'], props[1])
        self.assertTrue(config['aggregatable'])
        self.assertEqual(config['type'], 'string')
        self.assertEqual(config['label'], 'Assay Data Cell Type')
        self.assertEqual(config['label_mini'], 'Assay Data Cell Type')
def parse_search(search_term, es_indexes, selected_es_index):
    """
    :param search_term: Term to parse
    :param es_indexes: indexes in which the search will be done, separated by a comma
    :param selected_es_index: es index to focus on
    :return: the query to send to elasticsearch based on the search term provided
    """

    cache_key = f'{search_term}-{es_indexes}-{selected_es_index}'
    app_logging.debug(f'cache_key: {cache_key}')

    cache_response = cache.fail_proof_get(key=cache_key)
    if cache_response is not None:
        app_logging.debug(f'results were cached')
        return cache_response

    app_logging.debug(f'results were not cached')

    parsed_query = parse_query_str(search_term)
    indexes_list = es_indexes.split(',')
    best_queries, sorted_indexes_by_score = QueryBuilder.get_best_es_query(
        parsed_query, indexes_list, selected_es_index)

    response_dict = {
        'parsed_query': parsed_query,
        'best_es_base_queries': best_queries,
        'sorted_indexes_by_score': sorted_indexes_by_score
    }

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
    cache.fail_proof_set(key=cache_key,
                         value=response_dict,
                         timeout=seconds_valid)

    return response_dict
Example #18
0
    def test_parses_the_output_of_bjobs_finished_job(self):
        """
        Generates mock jobs, then sends a mock output to the the function to test that it interpreted the output
        accordingly. This test focuses on a job that switched to finished state.
        """
        self.create_test_jobs_0()

        sample_output = self.load_sample_file('app/job_status_daemon/test/data/sample_lsf_output_1.txt')

        with self.flask_app.app_context():
            daemon.parse_bjobs_output(sample_output)
            # job with lsf id 0 should be in running state now
            lsf_job_id = 4
            job = delayed_job_models.get_job_by_lsf_id(lsf_job_id)
            status_got = job.status
            status_must_be = delayed_job_models.JobStatuses.FINISHED
            self.assertEqual(status_got, status_must_be, msg='The status of the job was not changed accordingly!')

            finished_time = job.finished_at

            delta = timedelta(days=RUN_CONFIG.get('job_expiration_days'))
            expiration_date_must_be = finished_time + delta

            expiration_date_got = job.expires_at
            self.assertEquals(expiration_date_got, expiration_date_must_be,
                              msg='the job expiration date was not calculated correctly!')
    def test_gets_config_for_a_contextual_property(self):
        """
        tests gets the config for a virtual contextual property
        """
        configuration_manager = get_property_configuration_instance()

        es_index_prefix = RUN_CONFIG.get('es_index_prefix')
        index_name = f'{es_index_prefix}molecule'

        prop_id = '_context.similarity'
        config_got = configuration_manager.get_config_for_prop(
            index_name, prop_id)

        self.assertEqual(config_got['prop_id'], prop_id,
                         'The prop_id was not set up properly!')

        self.assertFalse(config_got['aggregatable'])
        self.assertTrue(config_got['sortable'])
        self.assertEqual(config_got['type'], 'double')
        self.assertEqual(config_got['label'], 'Similarity')
        self.assertEqual(config_got['label_mini'], 'Similarity')

        self.assertEqual(config_got['is_virtual'], True,
                         'This is a virtual property!')
        self.assertEqual(config_got['is_contextual'], True,
                         'This is a contextual property!')
Example #20
0
def get_or_create(job_type,
                  job_params,
                  docker_image_url,
                  input_files_hashes={}):
    """
    Based on the type and the parameters given, returns a job if it exists, if not it creates it and returns it.
    :param job_type: type of job to get or create
    :param job_params: parameters of the job
    :param input_files_hashes:
    :return: the job corresponding to those parameters.
    """
    job_id = generate_job_id(job_type, job_params, docker_image_url,
                             input_files_hashes)

    existing_job = DelayedJob.query.filter_by(id=job_id).first()
    if existing_job is not None:
        return existing_job

    run_environment = RUN_CONFIG.get('run_env')
    job = DelayedJob(id=job_id,
                     type=job_type,
                     raw_params=json.dumps(job_params, sort_keys=True),
                     docker_image_url=docker_image_url,
                     run_environment=run_environment)

    DB.session.add(job)
    DB.session.commit()
    return job
Example #21
0
def get_lsf_job_ids_to_check(lsf_host):
    """
    :param lsf_host: lsf host for which to return the jobs to check
    :return: a list of LSF job IDs for which it is necessary check the status in the LSF cluster. The jobs that are
    checked are the ones that:
    1. Were submitted to the same LSF cluster that I am running with (defined in configuration)
    2. Are not in Error or Finished state.
    """

    DB.session.commit()

    status_is_not_error_or_finished = DelayedJob.status.notin_(
        [JobStatuses.ERROR, JobStatuses.FINISHED])

    lsf_host_is_my_host = DelayedJob.lsf_host == lsf_host

    current_run_environment = RUN_CONFIG.get('run_env')
    run_environment_is_my_current_environment = \
        DelayedJob.run_environment == current_run_environment

    job_to_check_status = DelayedJob.query.filter(
        and_(lsf_host_is_my_host, status_is_not_error_or_finished,
             run_environment_is_my_current_environment))

    # Make sure there are no None value. This can happen when the server has created a job and is submitting it, and the
    # same time the daemon asks for jobs to check. This makes the daemon crash.
    ids = [
        job.lsf_job_id for job in job_to_check_status
        if job.lsf_job_id is not None
    ]

    DB.session.commit()

    return ids
    def get_config_for_prop(self, index_name, prop_id):
        """
        :param index_name: name of the index to which the property belongs
        :param prop_id: full path of the property, such as  '_metadata.assay_data.assay_subcellular_fraction'
        :return: a dict describing the configuration of a property
        """
        cache_key = f'config_for_{index_name}-{prop_id}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        app_logging.debug(
            f'getting property config for {prop_id} of index {index_name}')
        es_property_description = self.get_property_base_es_description(
            index_name, prop_id)
        property_override_description = self.get_property_base_override_description(
            index_name, prop_id)
        config = self.get_merged_prop_config(index_name, prop_id,
                                             es_property_description,
                                             property_override_description)

        seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=config,
                             timeout=seconds_valid)
        return config
Example #23
0
    def get_list_of_configured_properties(self, index_name):
        """
        :param index_name: the index to check
        :return: a list of all the configured properties among all the groups
        """

        cache_key = f'facets_configured_properties_for_{index_name}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.facets_groups_file_path, 'rt') as groups_file:

            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)
            index_groups = groups_config.get(index_name)
            if index_groups is None:
                raise self.FacetsGroupsConfigurationManagerError(
                    f'The index {index_name} does not have a configuration set up!'
                )
            properties_identified = set()
            for subgroup in index_groups.values():
                for properties_list in subgroup.values():
                    properties_identified.update(properties_list)

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=properties_identified,
                             timeout=seconds_valid)
        return list(properties_identified)
    def test_gets_config_for_a_group_with_default_and_additional_properties(
            self):
        """
        tests that gets config for a group with default and additional properties
        """
        groups_configuration_manager = get_group_configuration_instance()

        es_index_prefix = RUN_CONFIG.get('es_index_prefix')
        index_name = f'{es_index_prefix}activity'
        group_name = 'table'

        configs_got = groups_configuration_manager.get_config_for_group(
            index_name, group_name)['properties']

        with open(groups_configuration_manager.groups_file_path,
                  'rt') as groups_config_file:
            groups_must_be = yaml.load(groups_config_file,
                                       Loader=yaml.FullLoader)
            group_must_be = groups_must_be[index_name][group_name]

            for sub_group, props_list_must_be in group_must_be.items():
                props_list_got = [
                    conf['prop_id'] for conf in configs_got[sub_group]
                ]
                self.assertTrue(props_list_got == props_list_must_be)
    def test_gets_config_for_a_virtual_property(self):
        """
        Tests gets the correct config for a virtual property
        """

        configuration_manager = get_property_configuration_instance()

        with open(configuration_manager.override_file_path) as override_file:
            override_config_must_be = yaml.load(override_file,
                                                Loader=yaml.FullLoader)

            es_index_prefix = RUN_CONFIG.get('es_index_prefix')
            index_name = f'{es_index_prefix}molecule'

            prop_id = 'trade_names'
            config_got = configuration_manager.get_config_for_prop(
                index_name, prop_id)

            property_config_must_be = override_config_must_be[index_name][
                prop_id]
            self.assertEqual(config_got['prop_id'], prop_id,
                             'The prop_id was not set up properly!')
            self.assertEqual(config_got['based_on'],
                             property_config_must_be['based_on'],
                             'The based_on was not set up properly!')
            self.assertEqual(config_got['label'],
                             property_config_must_be['label'],
                             'The label was not set up properly!')
            self.assertFalse(config_got['aggregatable'],
                             'This property should not be aggregatable')

            self.assertEqual(config_got['is_virtual'], True,
                             'This is a virtual property!')
            self.assertEqual(config_got['is_contextual'], False,
                             'This is not a contextual property!')
def check_smiles(term_dict: dict):

    ws_base_path = RUN_CONFIG.get('chembl_api').get('ws_url')
    try:
        chembl_ids = []
        next_url_path = '{ws_path}/molecule.json?molecule_structures__canonical_smiles__flexmatch={smiles}'\
                        .format(ws_path=ws_base_path, smiles=urllib.parse.quote(term_dict['term']))
        while next_url_path:
            response = requests.get(
                WS_DOMAIN + next_url_path,
                headers={'Accept': 'application/json'},
                timeout=5
            )
            json_response = response.json()
            if 'error_message' in json_response:
                return None
            for molecule_i in json_response['molecules']:
                chembl_ids.append(molecule_i['molecule_chembl_id'])
            next_url_path = json_response['page_meta']['next']
        if chembl_ids:
            term_dict['references'].append(
                {
                    'type': 'smiles',
                    'label': 'SMILES',
                    'chembl_ids': get_chembl_id_list_dict(chembl_ids),
                    'include_in_query': True,
                    'chembl_entity': 'compound'
                }
            )
    except:
        traceback.print_exc()
Example #27
0
    def test_parses_the_output_of_bjobs_when_no_jobs_were_found(self):
        """
        Generates mock jobs, then sends a mock output to the the function to test that it interpreted the output
        accordingly
        """
        self.create_test_jobs_0()
        sample_output = self.load_sample_file('app/job_status_daemon/test/data/sample_lsf_output_0.txt')

        with self.flask_app.app_context():
            daemon.parse_bjobs_output(sample_output)
            # No status should have changed

            for status_must_be in [delayed_job_models.JobStatuses.CREATED, delayed_job_models.JobStatuses.QUEUED,
                           delayed_job_models.JobStatuses.RUNNING, delayed_job_models.JobStatuses.FINISHED,
                           delayed_job_models.JobStatuses.ERROR]:

                lsf_config = RUN_CONFIG.get('lsf_submission')
                lsf_host = lsf_config['lsf_host']

                for assigned_host in [lsf_host, 'another_host']:

                    id_to_check = f'Job-{assigned_host}-{status_must_be}'
                    job = delayed_job_models.get_job_by_id(id_to_check)
                    status_got = job.status
                    self.assertEqual(status_got, status_must_be,
                                     msg='The status was modified! This should have not modified the status')
def get_url_shortening(url_hash):
    """
    :param url_hash: hash of the url to look for
    :return: url shortening dict from elasticsearch
    """

    index_name = RUN_CONFIG.get('url_shortening').get('index_name')
    es_query = {
        "query": {
            "query_string": {
                "query": f'"{url_hash}"',
                "default_field": "hash"
            }
        }
    }

    shortening_response = es_data.get_es_response(index_name,
                                                  es_query,
                                                  ignore_cache=True)
    total_hits = shortening_response['hits']['total']['value']
    app_logging.debug(f'total_hits {total_hits}')

    if shortening_response['hits']['total']['value'] == 0:
        return None

    raw_document = shortening_response['hits']['hits'][0]
    return raw_document
Example #29
0
    def test_collects_the_urls_for_the_outputs_of_a_finished_job(self):
        """
        Generates some mock jobs, then sends a mock output to the function to test that it interprets that it finished.
        The finished job should have now the output files set
        """
        self.create_test_jobs_0()

        sample_output = self.load_sample_file('app/job_status_daemon/test/data/sample_lsf_output_1.txt')

        with self.flask_app.app_context():
            # Prepare the test scenario
            lsf_job_id = 4
            job = delayed_job_models.get_job_by_lsf_id(lsf_job_id)

            output_urls_must_be = []

            for i in range(0, 2):

                for subdir in ['', 'subdir/']:

                    out_file_name = f'output_{i}.txt'
                    out_file_path = f'{job.output_dir_path}/{subdir}{out_file_name}'
                    os.makedirs(Path(out_file_path).parent, exist_ok=True)
                    with open(out_file_path, 'wt') as out_file:
                        out_file.write(f'This is output file {i}')

                    server_base_path = RUN_CONFIG.get('base_path', '')
                    if server_base_path == '':
                        server_base_path_with_slash = ''
                    else:
                        server_base_path_with_slash = f'{server_base_path}/'

                    outputs_base_path = RUN_CONFIG.get('outputs_base_path')
                    output_url_must_be = f'/{server_base_path_with_slash}{outputs_base_path}/' \
                                         f'{job.id}/{subdir}{out_file_name}'

                    output_urls_must_be.append(output_url_must_be)

            # END to prepare the test scenario

            daemon.parse_bjobs_output(sample_output)
            job_outputs_got = job.output_files
            self.assertEqual(len(job_outputs_got), 4, msg='There must be 4 outputs for this job!')

            for output_file in job.output_files:
                output_url_got = output_file.public_url
                self.assertIn(output_url_got, output_urls_must_be, msg='The output url was not set correctly')
Example #30
0
def create_app():
    """
    Creates the flask app
    :return: Delayed jobs flask app
    """

    base_path = RUN_CONFIG.get('base_path', '')
    outputs_base_path = RUN_CONFIG.get('outputs_base_path', 'outputs')
    flask_app = Flask(__name__,
                      static_url_path=f'{base_path}/{outputs_base_path}',
                      static_folder=job_submission_service.JOBS_OUTPUT_DIR)

    # flask_app.config['SERVER_NAME'] = RUN_CONFIG.get('server_public_host')
    flask_app.config['SQLALCHEMY_DATABASE_URI'] = RUN_CONFIG.get(
        'sql_alchemy').get('database_uri')
    flask_app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = RUN_CONFIG.get(
        'sql_alchemy').get('track_modifications')
    flask_app.config['SECRET_KEY'] = RUN_CONFIG.get('server_secret_key')

    enable_cors = RUN_CONFIG.get('enable_cors', False)

    if enable_cors:
        CORS(flask_app)

    with flask_app.app_context():
        DB.init_app(flask_app)
        CACHE.init_app(flask_app)
        RATE_LIMITER.init_app(flask_app)

        for handler in flask_app.logger.handlers:
            RATE_LIMITER.logger.addHandler(handler)

        create_tables = RUN_CONFIG.get('sql_alchemy').get(
            'create_tables', False)
        if create_tables:
            DB.create_all()

        generate_default_config = RUN_CONFIG.get('generate_default_config',
                                                 False)
        if generate_default_config:
            delayed_job_models.generate_default_job_configs()

        flask_app.register_blueprint(SWAGGER_BLUEPRINT,
                                     url_prefix=f'{base_path}/swagger')
        flask_app.register_blueprint(SUBMISSION_BLUEPRINT,
                                     url_prefix=f'{base_path}/submit')
        flask_app.register_blueprint(JOB_STATUS_BLUEPRINT,
                                     url_prefix=f'{base_path}/status')
        flask_app.register_blueprint(ADMIN_AUTH_BLUEPRINT,
                                     url_prefix=f'{base_path}/admin')
        flask_app.register_blueprint(ADMIN_TASKS_BLUEPRINT,
                                     url_prefix=f'{base_path}/admin')
        flask_app.register_blueprint(
            CUSTOM_STATISTICS_BLUEPRINT,
            url_prefix=f'{base_path}/custom_statistics')

        return flask_app