Example #1
0
 def get_local(self, local_item: Item, overwrite=False):
     if overwrite or local_item.content is None:
         local_item.content = Local.load_json(local_item.path)
         for attribute in self._c.conf.instance_pools.strip_attributes:
             local_item.content.pop(attribute, None)
         local_item.content['instance_pool_name'] = self.remote_path(
             local_item.content['instance_pool_name'])
Example #2
0
    def get_local(self, local_item: Item, overwrite=False):
        if overwrite or local_item.content is None:
            local_item.content = Local.load_json(local_item.path)
            for attribute in self._c.conf.jobs.strip_attributes:
                local_item.content.pop(attribute, None)
            c = local_item.content

            # apply default values
            c['timeout_seconds'] = c.get('timeout_seconds', 0)

            # find the cluster
            if c.get('existing_cluster_name') and self._clusters:
                ec = self._clusters.get_single_item(c['existing_cluster_name'])
                assert ec is not None, f'Cluster "{c["existing_cluster_name"]}", ' \
                                       f'referenced in job "{c["name"]}" not found'
                c['existing_cluster_id'] = ec.path
                c.pop('existing_cluster_name', None)

            # find the right notebook
            notebook_path = c.get('notebook_task', {}).get('notebook_path')
            if notebook_path:
                remote_notebook_path = self._workspace.find_notebook(
                    notebook_path)
                assert remote_notebook_path is not None, \
                    f'Notebook "{notebook_path}" referenced in job "{c["name"]}" not found'
                c['notebook_task']['notebook_path'] = remote_notebook_path

            c['name'] = self.remote_path(c['name'])
Example #3
0
 def _ls(self, path=None):
     query = f'?filter=userName+eq+{path}' if path else None
     users = json.loads(
         self._c.api.call(Endpoints.users_list, body={}, query=query).text)
     return {
         i['userName']: Item(path=i['id'], kind='user', content=i)
         for i in users.get('Resources', [])
     }
Example #4
0
 def _get_remote(self, remote_item: Item, overwrite=False):
     if overwrite or remote_item.content is None:
         response = self._c.api.call(Endpoints.workspace_export,
                                     body={
                                         'path': remote_item.path,
                                         'format': 'SOURCE'
                                     })
         remote_item.content = base64.b64decode(response.json()['content'])
Example #5
0
 def dbfs_ls(path) -> OrderedDict:
     _objects = OrderedDict()
     if path is not None:
         for cur_path, dirs, files in os_walk(path):
             for f in files:
                 _objects[Local._common_dbfs_name(
                     op.join(cur_path, f),
                     path)] = Item(path=op.join(cur_path, f),
                                   kind='dbfs file',
                                   size=op.getsize(op.join(cur_path, f)),
                                   is_dir=False)
             for d in dirs:
                 _objects[Local._common_dbfs_name(op.join(
                     cur_path, d), path)] = Item(path=op.join(cur_path, d),
                                                 kind='dbfs directory',
                                                 is_dir=True)
     return _objects
Example #6
0
 def _ls(self, path=None):
     jobs = json.loads(self._c.api.call(Endpoints.jobs_list, body={}).text)
     return {
         self.common_path(i['settings']['name']):
         Item(path=i['job_id'], kind='job', content=i['settings'])
         for i in jobs.get('jobs', [])
         if i['creator_user_name'] == self._c.conf.deploying_user_name
         and i['settings']['name'].startswith(self._c.conf.name_prefix)
     }
Example #7
0
 def workspace_ls(path) -> OrderedDict:
     _objects = OrderedDict()
     if path is not None:
         for cur_path, dirs, files in os_walk(path):
             for f in files:
                 if op.splitext(f)[1] in NOTEBOOK_EXTENSIONS:
                     _objects[Local._common_name(
                         op.join(cur_path, f),
                         path)] = Item(path=op.join(cur_path, f),
                                       kind='workspace notebook',
                                       language=NOTEBOOK_EXTENSIONS[
                                           op.splitext(f)[1]],
                                       is_dir=False)
             for d in dirs:
                 _objects[Local._common_name(op.join(
                     cur_path, d), path)] = Item(path=op.join(cur_path, d),
                                                 kind='workspace directory',
                                                 is_dir=True)
     return _objects
Example #8
0
 def _ls(self, path=None):
     clusters = json.loads(
         self._c.api.call(Endpoints.clusters_list, body={}).text)
     return {
         self.common_path(i['cluster_name']): Item(path=i['cluster_id'],
                                                   kind='cluster',
                                                   content=i)
         for i in clusters.get('clusters', [])
         if i['creator_user_name'] == self._c.conf.deploying_user_name
         and i['cluster_name'].startswith(self._c.conf.name_prefix)
     }
Example #9
0
 def _ls(self, path=None):
     instance_pools = json.loads(
         self._c.api.call(Endpoints.instance_pools_list, body={}).text)
     return {
         self.common_path(i['instance_pool_name']):
         Item(path=i['instance_pool_id'], kind='instance pool', content=i)
         for i in instance_pools.get('instance_pools', [])
         if i['default_tags']['DatabricksInstancePoolCreatorId'] ==
         self._c.conf.deploying_user_id
         and i['instance_pool_name'].startswith(self._c.conf.name_prefix)
     }
Example #10
0
 def get_local(self, local_item: Item, overwrite=False):
     if overwrite or local_item.content is None:
         local_item.content = Local.load_json(local_item.path)
         for attribute in self._c.conf.clusters.strip_attributes:
             local_item.content.pop(attribute, None)
         c = local_item.content
         if c.get('instance_pool_name') and self._instance_pools:
             ip = self._instance_pools.get_single_item(
                 c['instance_pool_name'])
             assert ip is not None, f'Instance pool "{c["instance_pool_name"]}", ' \
                                    f'referenced in cluster "{c["cluster_name"]}" not found'
             c['instance_pool_id'] = ip.path
             c.pop('instance_pool_name', None)
         c['cluster_name'] = self.remote_path(c['cluster_name'])
Example #11
0
 def files_ls(path, extensions=None, kind=None) -> OrderedDict:
     _files = OrderedDict()
     if path is not None:
         for cur_path, _, files in os_walk(path):
             for f in files:
                 if extensions is None or op.splitext(f)[1] in extensions:
                     _files[Local._common_name(
                         op.join(cur_path, f),
                         path)] = Item(path=op.join(cur_path, f),
                                       kind=kind,
                                       size=op.getsize(op.join(cur_path,
                                                               f)),
                                       is_dir=False)
     return _files
Example #12
0
 def _ls(self, path=None):
     if path is None:
         path = self._target_path
     _objects = OrderedDict()
     for obj in self._c.api.call(Endpoints.dbfs_list, body={
             'path': path
     }).json().get('files', []):
         if obj['is_dir']:
             _objects = dict(_objects, **self._ls(obj['path']))
         _objects[self.common_path(obj['path'])] = Item(
             path=obj['path'],
             kind='dbfs directory' if obj['is_dir'] else 'dbfs file',
             is_dir=obj['is_dir'],
             size=obj['file_size'])
     return _objects
Example #13
0
 def _ls(self, path=None):
     if path is None:
         path = self._target_path
     _objects = OrderedDict()
     for obj in self._c.api.call(Endpoints.workspace_list,
                                 body={
                                     'path': path
                                 }).json().get('objects', []):
         if obj['object_type'] == 'DIRECTORY':
             _objects = dict(_objects, **self._ls(obj['path']))
         _objects[self.common_path(obj['path'])] = Item(
             path=obj['path'],
             kind=obj['object_type'].lower(),
             language=obj.get('language', ''),
             is_dir=obj['object_type'] == 'DIRECTORY')
     return _objects
Example #14
0
 def get_local(local_item: Item, overwrite=False):
     if overwrite or local_item.content is None:
         local_item.content = Local.load_binary(local_item.path)
Example #15
0
 def _get_remote(self, remote_item: Item, overwrite=False):
     if overwrite or remote_item.content is None:
         response = self._c.api.call(Endpoints.dbfs_read,
                                     body={'path': remote_item.path})
         remote_item.content = base64.b64decode(response.json()['content'])
Example #16
0
 def _update(self, local_item: Item, remote_item: Item):
     self._remote_items_stale = True
     self.get_local(local_item)
     local_item.content['cluster_id'] = remote_item.path
     return self._c.api.call(Endpoints.clusters_edit,
                             body=local_item.content)