def check_workspace_deleted(ws_id):
    """
    Since the DELETE_WORKSPACE event can correspond to workspace undeletion as well as deletion,
    we make sure that the workspace is deleted. This is done by making sure we get an excpetion
    with the word 'delete' in the error body.
    """
    ws_client = WorkspaceClient(url=config()['kbase_endpoint'],
                                token=config()['ws_token'])
    try:
        ws_client.admin_req("getWorkspaceInfo", {'id': ws_id})
    except WorkspaceResponseError as err:
        if 'delete' in err.resp_text:
            return True
    return False
Exemple #2
0
def _generate_taxon_edge(obj_ver_key, obj_data):
    if 'taxon_ref' not in obj_data['data']:
        logger.info('No taxon ref in object; skipping..')
        return
    ws_client = WorkspaceClient(url=config()['kbase_endpoint'],
                                token=config()['ws_token'])
    result = ws_client.admin_req(
        'getObjects', {'objects': [{
            'ref': obj_data['data']['taxon_ref']
        }]})
    taxonomy_id = result['data'][0]['data']['taxonomy_id']
    adb_resp = _stored_query('ncbi_fetch_taxon', {
        'id': str(taxonomy_id),
        'ts': int(time.time() * 1000),
    })
    adb_results = adb_resp['results']
    if not adb_results:
        logger.info(f'No taxonomy node in database for id {taxonomy_id}')
        return
    tax_key = adb_results[0]['_key']
    # Create an edge from the ws_object_ver to the taxon
    from_id = f"{_OBJ_VER_COLL}/{obj_ver_key}"
    to_id = f"{_TAX_VER_COLL}/{tax_key}"
    logger.info(f'Creating taxon edge from {from_id} to {to_id}')
    _save(_TAX_EDGE_COLL, [{
        '_from': from_id,
        '_to': to_id,
        'assigned_by': '_system'
    }])
def _reindex_ws_type(args):
    """
    Reindex all objects in the entire workspace server based on a type name.
    """
    if not re.match(r'^.+\..+-\d+\.\d+$', args.type):
        sys.stderr.write('Enter the full type name, such as "KBaseGenomes.Genome-17.0"')
        sys.exit(1)
    # - Iterate over all workspaces
    #   - For each workspace, list objects
    #   - For each obj matching args.type, produce a reindex event
    ws = WorkspaceClient(url=config()['kbase_endpoint'], token=config()['ws_token'])
    evtype = 'INDEX_NONEXISTENT'
    if args.overwrite:
        evtype = 'REINDEX'
    for wsid in range(args.start, args.stop + 1):
        try:
            infos = ws.admin_req('listObjects', {'ids': [wsid]})
        except WorkspaceResponseError as err:
            print(err.resp_data['error']['message'])
            continue
        for obj_info in infos:
            obj_type = obj_info[2]
            if obj_type == args.type:
                _produce({'evtype': evtype, 'wsid': wsid, 'objid': obj_info[0]})
    print('..done!')
def fetch_objects_in_workspace(ws_id, include_narrative=False):
    """
    Get a list of dicts with keys 'type' and 'name' corresponding to all data
    objects in the requested workspace.
    Args:
        ws_id - a workspace id
    """
    ws_client = WorkspaceClient(url=config()['kbase_endpoint'],
                                token=config()['ws_token'])
    try:
        narr_data_obj_info = ws_client.admin_req("listObjects",
                                                 {"ids": [ws_id]})
    except WorkspaceResponseError as err:
        logger.error("Workspace response error: ", err.resp_data)
        raise err
    if include_narrative:
        narrative_data = [{
            "obj_id": obj[0],
            "name": obj[1],
            "obj_type": obj[2],
            "ver": obj[4]
        } for obj in narr_data_obj_info]
    else:
        narrative_data = [{
            "name": obj[1],
            "obj_type": obj[2]
        } for obj in narr_data_obj_info if 'KBaseNarrative' not in str(obj[2])]
    return narrative_data
def is_workspace_public(ws_id):
    """
    Check if a workspace is public, returning bool.
    """
    ws_client = WorkspaceClient(url=config()['kbase_endpoint'],
                                token=config()['ws_token'])
    ws_info = ws_client.admin_req('getWorkspaceInfo', {'id': ws_id})
    global_read = ws_info[6]
    return global_read != 'n'
def is_workspace_public(ws_id):
    """
    Check if a workspace is public, returning bool.
    """
    ws_url = _CONFIG['workspace_url']
    ws_client = WorkspaceClient(url=ws_url, token=_CONFIG['ws_token'])
    ws_info = ws_client.admin_req('getWorkspaceInfo', {'id': ws_id})
    global_read = ws_info[6]
    return global_read != 'n'
def delete_obj(msg):
    """
    Handle an object deletion event (OBJECT_DELETE_STATE_CHANGE)
    Delete everything that was created for this object. This is the inverse
    operation of the import_obj action.
    """
    ws_client = WorkspaceClient(url=config()['kbase_endpoint'], token=config()['ws_token'])
    obj_ref = f"{msg['wsid']}/{msg['objid']}"
    if msg.get("ver"):
        obj_ref += f"/{msg['ver']}"
    obj_info = ws_client.admin_req('getObjectInfo', {
        'objects': [{'ref': obj_ref}]
    })['infos'][0]
    delete_object(obj_info)
def new_object_version(event_data):
    """
    A new object version has been created on the workspace.
    Handles events NEW_ALL_VERSIONS or NEW_VERSION
    Args:
        event_data - json data from the kafka event
    """
    config = get_config()
    ws_url = config['kbase_endpoint'] + '/ws'
    ws_client = WorkspaceClient(url=ws_url, token=config['ws_token'])
    # New index for all object versions
    if event_data['evtype'] == 'NEW_ALL_VERSIONS':
        # Create an UPA without a version
        upa = f"{event_data['wsid']}/{event_data['objid']}"
        ws_resp = ws_client.admin_req('getObjectInfo', {
            'objects': [{'ref': upa}]
        })
        obj_info = ws_resp['infos'][0]
        vers = obj_info[4]
        event_data['ver'] = vers
        typ, ver = obj_info[2].split('-')
        event_data['objtype'] = typ
        event_data['objtypever'] = ver
        event_data['upa'] = f'{upa}/{vers}'
        print('new event data', event_data)
    indexes = get_indexer_for_type(event_data['objtype'])
    for oindex in indexes:
        try:
            if oindex.get('multi'):
                # _new_object_version_multi_index(event, oindex)
                # TODO
                print('_new_object_version_multi_index')
            elif oindex.get('raw'):
                # _new_raw_version_index(event, oindex)
                # TODO
                print('_new_raw_version_index')
            else:
                # _new_object_version_index(event, oindex)
                # TODO
                print('_new_object_version_index')
        except Exception as e:
            print('Failed for index', e)
            # (event, oindex, e)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            print('=' * 80)
            traceback.print_tb(exc_traceback, limit=1, file=sys.stdout)
            traceback.print_exception(exc_type, exc_value, exc_traceback,
                                      limit=2, file=sys.stdout)
def check_object_deleted(ws_id, obj_id):
    """
    We check an object is deleted by listing the object in a workspace and
    making sure the object we are looking for is missing.

    We want to do this because the DELETE event can correspond to more than
    just an object deletion, so we want to make sure the object is deleted
    """
    ws_client = WorkspaceClient(url=config()['kbase_endpoint'],
                                token=config()['ws_token'])
    try:
        narr_data_obj_info = ws_client.admin_req("listObjects",
                                                 {'ids': [ws_id]})
    except WorkspaceResponseError as err:
        logger.warning(f"Workspace response error: {err.resp_data}")
        narr_data_obj_info = []
    # Make sure obj_id is not in list of object ids (this means it is deleted)
    obj_ids = [obj[0] for obj in narr_data_obj_info]
    return obj_id not in obj_ids
Exemple #10
0
def index_obj(event_data):
    """
    For a newly created object, generate the index document for it and push to
    the elasticsearch topic on Kafka.

    Args:
        event_data - json event data received from the kafka workspace events stream
    """
    # Fetch the object data from the workspace API
    upa = _get_upa_from_event_data(event_data)
    config = get_config()
    ws_url = config['kbase_endpoint'] + '/ws'
    ws_client = WorkspaceClient(url=ws_url, token=config['ws_token'])
    upa = _get_upa_from_event_data(event_data)
    obj_data = ws_client.admin_req('getObjects', {'objects': [{'ref': upa}]})
    # Dispatch to a specific type handler to produce the search document
    (type_module_name, type_version) = event_data['objtype'].split('-')
    (type_module, type_name) = type_module_name.split('.')
    indexer = _find_indexer(type_module, type_name, type_version)
    return indexer(obj_data)
Exemple #11
0
def get_shared_users(ws_id):
    """
    Get the list of users that have read, write, or author access to a workspace object.
    Args:
        ws_id - workspace id of requested workspace object
    """
    ws_client = WorkspaceClient(url=config()['kbase_endpoint'],
                                token=config()['ws_token'])
    try:
        obj_perm = ws_client.admin_req("getPermissionsMass",
                                       {'workspaces': [{
                                           'id': ws_id
                                       }]})['perms'][0]
    except WorkspaceResponseError as err:
        logger.error("Workspace response error: ", err.resp_data)
        raise err
    shared_users = []
    for username, user_perms in obj_perm.items():
        if user_perms in ['a', 'r', 'w'] and username != '*':
            shared_users.append(username)
    return shared_users
def import_object(obj, ws_info):
    """
    Import all the edges and vertices for a workspace object into RE.
    """
    # TODO handle the ws_latest_version_of edge -- some tricky considerations here
    # Save the ws_object document
    obj_info = obj['info']
    wsid = obj_info[6]
    objid = obj_info[0]
    obj_key = f'{wsid}:{objid}'
    obj_ver = obj_info[4]
    obj_ver_key = f'{obj_key}:{obj_ver}'
    _save_ws_object(obj_info, ws_info)
    _save_obj_hash(obj_info)
    _save_obj_version(obj_ver_key, obj_ver, obj_info, ws_info)
    _save_copy_edge(obj_ver_key, obj)
    _save_obj_ver_edge(obj_ver_key, obj_key)
    _save_ws_contains_edge(obj_key, obj_info)
    _save_workspace(ws_info)
    _save_type_vertices(obj_info)
    _save_created_with_method_edge(obj_ver_key, obj.get('provenance'))
    _save_created_with_module_edge(obj_ver_key, obj.get('provenance'))
    _save_inst_of_type_edge(obj_ver_key, obj_info)
    _save_owner_edge(obj_ver_key, obj_info)
    _save_referral_edge(obj_ver_key, obj)
    _save_prov_desc_edge(obj_ver_key, obj)
    type_, _ = obj_info[2].split('-')  # 2nd var is version
    if type_ in _TYPE_PROCESSOR_MAP:
        # this could use a lot of memory. There's a bunch of code in the workspace for
        # dealing with this situation, but that'd have to be ported to Python and it's pretty
        # complex, so YAGNI for now.
        ws_client = WorkspaceClient(url=config()['kbase_endpoint'],
                                    token=config()['ws_token'])
        resp = ws_client.admin_req(
            'getObjects',
            {'objects': [{
                'ref': obj_ver_key.replace(':', '/'),
            }]})
        _TYPE_PROCESSOR_MAP[type_](obj_ver_key, resp['data'][0])
def check_object_deleted(ws_id, obj_id):
    """
    We check an object is deleted by listing the object in a workspace and
    making sure the object we are looking for is missing.

    We want to do this because the DELETE event can correspond to more than
    just an object deletion, so we want to make sure the object is deleted
    """
    ws_client = WorkspaceClient(url=config()['kbase_endpoint'],
                                token=config()['ws_token'])
    try:
        narr_data_obj_info = ws_client.admin_req("listObjects",
                                                 {'ids': [ws_id]})
    except WorkspaceResponseError as err:
        logger.error("Workspace response error: ", err.resp_data)
        # NOTE: not sure if we want to raise err here, worth thinking about
        raise err
    # make sure obj_id is not in list of object ids of workspace (this means its deleted)
    if obj_id not in [obj[0] for obj in narr_data_obj_info]:
        return True
    else:
        return False