예제 #1
0
def default_fields(obj_data, ws_info, obj_data_v1):
    """
    Produce data for fields that are present in any workspace object document on elasticsearch.
    """
    ws_id = obj_data['info'][6]
    obj_id = obj_data['info'][0]
    version = obj_data['info'][4]
    v1_info = obj_data_v1['info']
    is_public = ws_info[6] == 'r'
    shared_users = get_shared_users(ws_id)
    copy_ref = obj_data.get('copied')
    obj_type = obj_data['info'][2]
    (type_module, type_name, type_version) = get_type_pieces(obj_type)
    tags = _get_tags(ws_info)
    return {
        "creator": obj_data["creator"],
        "access_group": ws_id,
        "obj_name": obj_data['info'][1],
        "shared_users": shared_users,
        "timestamp": obj_data['epoch'],
        "creation_date": v1_info[3],
        "is_public": is_public,
        "version": version,
        "obj_id": obj_id,
        "copied": copy_ref,
        "tags": tags,
        "obj_type_version": type_version,
        "obj_type_module": type_module,
        "obj_type_name": type_name
    }
예제 #2
0
def index_obj(obj_data, ws_info, msg_data):
    """
    For a newly created object, generate the index document for it and push to
    the elasticsearch topic on Kafka.
    Args:
        obj_data - in-memory parsed data from the workspace object
        msg_data - json event data received from the kafka workspace events
            stream. Must have keys for `wsid` and `objid`
    """
    obj_type = obj_data['info'][2]
    (type_module, type_name, type_version) = ws_utils.get_type_pieces(obj_type)
    if (type_module + '.' + type_name) in _TYPE_BLACKLIST:
        # Blacklisted type, so we don't index it
        return
    # check if this particular object has the tag "noindex"
    metadata = ws_info[-1]
    # If the workspace's object metadata contains a "nosearch" tag, skip it
    if metadata.get('searchtags'):
        if 'noindex' in metadata['searchtags']:
            return
    # Get the info of the first object to get the creation date of the object.
    upa = get_upa_from_msg_data(msg_data)
    try:
        obj_data_v1 = ws_client.admin_req('getObjects', {
            'objects': [{
                'ref': upa + '/1'
            }],
            'no_data': 1
        })
    except WorkspaceResponseError as err:
        logger.error('Workspace response error:', err.resp_data)
        raise err
    obj_data_v1 = obj_data_v1['data'][0]
    # Dispatch to a specific type handler to produce the search document
    indexer = _find_indexer(type_module, type_name, type_version)
    # All indexers are generators that yield document data for ES.
    defaults = indexer_utils.default_fields(obj_data, ws_info, obj_data_v1)
    for indexer_ret in indexer(obj_data, ws_info, obj_data_v1):
        if indexer_ret['_action'] == 'index':
            if config()['allow_indices'] and indexer_ret.get(
                    'index') not in config()['allow_indices']:
                # This index name is not in the indexing whitelist from the config, so we skip
                logger.debug(
                    f"Index '{indexer_ret['index']}' is not in ALLOW_INDICES, skipping"
                )
                continue
            if indexer_ret.get('index') in config()['skip_indices']:
                # This index name is in the indexing blacklist in the config, so we skip
                logger.debug(
                    f"Index '{indexer_ret['index']}' is in SKIP_INDICES, skipping"
                )
                continue
            if '_no_defaults' not in indexer_ret:
                # Inject all default fields into the index document.
                indexer_ret['doc'].update(defaults)
        yield indexer_ret
예제 #3
0
def _init_generic_index(msg):
    """
    Initialize an index from a workspace object indexed by the generic indexer.
    For example, when the generic indexer gets a type like Module.Type-4.0,
    then we create an index called "search2.type_0".
    Message fields:
        full_type_name - string - eg. "Module.Type-X.Y"
    """
    (_, type_name, type_ver) = get_type_pieces(msg['full_type_name'])
    index_name = type_name.lower()
    mappings = {**_GLOBAL_MAPPINGS['ws_auth'], **_GLOBAL_MAPPINGS['ws_object']}
    _init_index(index_name + '_0', mappings)
예제 #4
0
def _init_generic_index(msg):
    """
    Initialize an index from a workspace object indexed by the generic indexer.
    For example, when the generic indexer gets a type like Module.Type-4.0,
    then we create an index called "search2.type_0".
    Message fields:
        full_type_name - string - eg. "Module.Type-X.Y"
    """
    (_, type_name, type_ver) = get_type_pieces(msg['full_type_name'])
    index_name = type_name.lower() + '_0'
    mappings = {**_GLOBAL_MAPPINGS['ws_auth'], **_GLOBAL_MAPPINGS['ws_object']}
    _init_index(index_name, mappings)
    # Update the 'default_search' alias to include this index
    _create_alias(_DEFAULT_SEARCH_ALIAS, f"{_PREFIX}.{index_name}")
예제 #5
0
def index_from_sdk(obj_data, ws_info, obj_data_v1, conf):
    """Index from an sdk application"""
    type_module, type_name, type_version = ws_utils.get_type_pieces(
        obj_data['info'][2])

    indexer_app_vars = config()['global']['sdk_indexer_apps'][type_module +
                                                              '.' + type_name]
    sdk_app = indexer_app_vars['sdk_app']
    sdk_func = indexer_app_vars['sdk_func']
    sdk_version = indexer_app_vars.get('sdk_version', None)
    sub_obj_index = _get_sub_obj_index(indexer_app_vars)

    workspace_id = obj_data['info'][6]
    object_id = obj_data['info'][0]

    index_name_ver = _get_index_name(type_module, type_name, type_version)
    image = _get_docker_image_name(sdk_app, sdk_version)
    _pull_docker_image(image)

    job_dir = _SCRATCH + "/" + str(uuid.uuid1())
    os.makedirs(job_dir)
    _setup_docker_inputs(job_dir, obj_data, ws_info, obj_data_v1, sdk_app,
                         sdk_func)

    # the volume mount must be relative to the Host, so we add _MOUNT_DIR to job_directory.
    vols = {_MOUNT_DIR + job_dir: {'bind': _IN_APP_JOB_DIR, 'mode': 'rw'}}
    env = {
        'SDK_CALLBACK_URL': 'not_supported_yet',
        'KBASE_ENDPOINT': config()['kbase_endpoint']
    }

    # Run docker container.
    _DOCKER.containers.run(image, 'async', environment=env, volumes=vols)

    with open(job_dir + "/output.json") as fd:
        job_out = json.load(fd)
    if job_out.get('error'):
        raise RuntimeError(f"Error from sdk application: {job_out['error']}")
    job_out = job_out['result'][0]
    if job_out.get('filepath'):
        filepath = job_out['filepath'].replace(_IN_APP_JOB_DIR, job_dir, 1)
    else:
        raise RuntimeError(f"Unknown sdk application error: {job_out}")

    return _verify_and_format_output(filepath, job_dir, workspace_id,
                                     object_id, index_name_ver, sub_obj_index)
예제 #6
0
 def fn(obj_data, ws_info, obj_data_v1):
     workspace_id = obj_data['info'][6]
     object_id = obj_data['info'][0]
     obj_type = obj_data['info'][2]
     # Send an event to the elasticsearch_writer to initialize an index for this
     # type, if it does not exist.
     yield {'_action': 'init_generic_index', 'full_type_name': obj_type}
     obj_type_name = ws_utils.get_type_pieces(obj_type)[1]
     yield {
         '_action': 'index',
         'doc': indexer_utils.default_fields(obj_data, ws_info,
                                             obj_data_v1),
         'index': obj_type_name.lower() + "_0",
         'id': f"WS::{workspace_id}:{object_id}",
         'no_defaults': True,
         # 'namespace': "WS"
     }
예제 #7
0
def _save_type_vertices(obj_info):
    """Save associated vertices for an object type."""
    obj_type = sanitize_arangodb_key(obj_info[2])
    (type_module, type_name, type_ver) = get_type_pieces(obj_type)
    (maj_ver, min_ver) = [int(v) for v in type_ver.split('.')]
    logger.info(
        f'Saving ws_type_version, ws_type, and ws_type_module for {obj_type}')
    save(
        'ws_type_version', {
            '_key': obj_type,
            'type_name': type_name,
            'module_name': type_module,
            'maj_ver': maj_ver,
            'min_ver': min_ver
        })
    save(
        'ws_type', {
            '_key': f'{type_module}.{type_name}',
            'type_name': type_name,
            'module_name': type_module
        })
    save('ws_type_module', {'_key': type_module})