Exemple #1
0
    def read_schema(self, raise_if_empty=True):
        """Gets the schema of this dataset, as an array of objects like this one:
        { 'type': 'string', 'name': 'foo', 'maxLength': 1000 }.
        There is more information for the map, array and object types.
        """
        if self.cols is None:

            if os.getenv("FLOW_FORCED_SCHEMAS") is not None:
                ffs = json.loads(os.getenv("FLOW_FORCED_SCHEMAS"))
                if self.full_name in ffs:
                    logging.info("Forcing schema: %s" % ffs[self.full_name])
                    return ffs[self.full_name]["columns"]

            self.cols = intercom.jek_or_backend_json_call(
                "datasets/get-schema/",
                data={
                    "fullDatasetName": self.full_name
                },
                err_msg='Unable to fetch schema for %s' %
                (self.name)).get("columns")

        if raise_if_empty and len(self.cols) == 0:
            raise Exception("No column in schema of %s."
                            " Have you set up the schema for this dataset?" %
                            self.name)
        return Schema(self.cols, )
Exemple #2
0
    def init_write_session(self,request):
        request["activityId"] = self.activity_id
        json_resp = jek_or_backend_json_call("datasets/init-write-session/", data={"request": json.dumps(request)})

        # This call is NOT supposed to fail. We always get a session ID.
        # If the request is invalid, the error must be retrieved by wait_write_session()
        return json_resp.get('id', MISSING_ID_MARKER), json_resp.get('message')
Exemple #3
0
 def get_files_info(self, partitions=[]):
     return intercom.jek_or_backend_json_call(
         "datasets/get-files-info/",
         data={
             "projectKey": self.project_key,
             "datasetName": self.short_name,
             "partitions": json.dumps(partitions)
         },
         err_msg="Failed to get the dataset files info")
Exemple #4
0
 def get_location_info(self, sensitive_info=False):
     return intercom.jek_or_backend_json_call(
         "datasets/get-location-info/",
         data={
             "projectKey": self.project_key,
             "datasetName": self.short_name,
             "sensitiveInfo": sensitive_info
         },
         err_msg="Failed to get the dataset location info")
Exemple #5
0
 def list_partitions(self):
     """
     Gets the partitions in the folder
 
     :rtype: list
     """
     return jek_or_backend_json_call("managed-folders/list-partitions", {
         "projectKey": self.project_key,
         "lookup": self.short_name
     })
Exemple #6
0
def use_plugin_libs(plugin_id):
    """Add the lib/ folder of the plugin to PYTHONPATH"""
    folders = intercom.jek_or_backend_json_call("plugins/get-lib-folders",
                                                data={"pluginId": plugin_id})

    python_lib = folders.get('pythonLib', '')
    if len(python_lib) > 0:
        if python_lib not in sys.path:
            sys.path.append(python_lib)
    else:
        raise Exception('No python-lib folder defined in this plugin')
Exemple #7
0
    def list_partitions(self, raise_if_empty=True):
        """List the partitions of this dataset, as an array of partition specifications"""
        if self.partitions is None:
            self.partitions = intercom.jek_or_backend_json_call(
                "datasets/list-partitions/",
                data={"fullDatasetName": self.full_name},
                err_msg='Unable to list partitions for %s' % (self.name))

        if raise_if_empty and len(self.partitions) == 0:
            raise Exception("No partition in %s." % self.name)
        return self.partitions
Exemple #8
0
    def get_path_details(self, path='/'):
        """
        Get details about a specific path (file or directory) in the folder

        :rtype: dict
        """
        return jek_or_backend_json_call(
            "managed-folders/get-path-details", {
                "projectKey": self.project_key,
                "lookup": self.short_name,
                "path": path
            })
Exemple #9
0
 def get_partition_info(self, partition):
     """
     Get information about the partitions of this managed folder
     :rtype: dict
     """
     self.partition_infos[partition] = jek_or_backend_json_call(
         "managed-folders/get-partition-paths", {
             "projectKey": self.project_key,
             "lookup": self.short_name,
             "partition": partition
         })
     return self.partition_infos[partition]["info"]
Exemple #10
0
 def get_info(self, sensitive_info=False):
     """
     Get information about the location and settings of this managed folder
     :rtype: dict
     """
     if self.info is None:
         self.info = jek_or_backend_json_call(
             "managed-folders/get-info", {
                 "projectKey": self.project_key,
                 "lookup": self.short_name,
                 "sensitiveInfo": sensitive_info
             })
     return self.info["info"]
Exemple #11
0
def import_from_plugin(plugin_id, package_name):
    """Import a package from the lib/ folder of the plugin and returns the module"""
    folders = intercom.jek_or_backend_json_call("plugins/get-lib-folders",
                                                data={"pluginId": plugin_id})

    python_lib = folders.get('pythonLib', '')
    if len(python_lib) > 0:
        fp, pathname, description = imp.find_module(package_name, [python_lib])
        try:
            return imp.load_module(package_name, fp, pathname, description)
        finally:
            if fp:
                fp.close()
    else:
        raise Exception('No python-lib folder defined in this plugin')
Exemple #12
0
 def _ensure_and_check_direct_access(self):
     if remoterun._is_running_remotely():
         raise Exception(
             'Python process is running remotely, direct access to folder is not possible'
         )
     elif self.get_info().get("type", None) == 'Filesystem':
         if self.access_granted is None:
             self.access_granted = jek_or_backend_json_call(
                 "managed-folders/ensure-direct-access", {
                     "projectKey": self.project_key,
                     "lookup": self.short_name,
                 })
     else:
         raise Exception(
             'Folder is not on the local filesystem (uses %s), cannot perform direct filesystem access. Use the read/write API instead. '
             % self.get_info().get('type', 'unknown'))
Exemple #13
0
def toSQL(builder, dataset=None, dialect=None):
    connection = None
    if dialect is None:
        if dataset is None:
            raise Exception(
                "Either a dialect or a connection must be specified")
        connection = _get_dataset_connection(dataset)

    ast = json.dumps(builder._get_ast(), default=lambda o: o._expr, indent=4)
    if ast is None or len(ast) == 0:
        raise Exception("Empty query")

    resp = intercom.jek_or_backend_json_call("sql-generation/expr",
                                             data={
                                                 "ast": ast,
                                                 "dialect": dialect,
                                                 "connection": connection
                                             })

    return resp.get('sql')
Exemple #14
0
                        files={'file': ('error.json', f, 'application/json')})
            except Exception as e:
                logging.error("Could not send error to backend: %s" % e)

    if error_code != 0 and fail_if_subprocess_failed:
        logging.error("Failing container because subprocess failed (code %s)" %
                      error_code)
        sys.exit(error_code)


if __name__ == "__main__":
    setup_log()
    os.chdir(HOME_DIR)
    logging.info("Fetching job definition")
    execution_id = sys.argv[1]
    execution = intercom.jek_or_backend_json_call(
        'containers/get-execution', data={'executionId': execution_id})
    # segregate the remote-run-env-def.json (might contain stuff we don't want logged)
    # also the R-exec-wrapper expects it
    dku_exec_env = execution.get('envResource', {
        'env': {},
        'python': {},
        'r': {}
    })
    execution['envResource'] = None
    logging.info("got exec: " + str(execution))
    with open("execution.json", 'w') as fd:
        json.dump(execution, fd)
    # add the lib folders on the path
    if execution['type'] != 'RECIPE_R':
        python_env = dku_exec_env.get('python', {})
        python_env['pythonPathChunks'] = [
Exemple #15
0
 def read_metadata(self):
     """Reads the dataset metadata object"""
     return intercom.jek_or_backend_json_call(
         "datasets/get-metadata", data={"fullDatasetName": self.full_name})
Exemple #16
0
def get_connection(connection_name):
    conn_obj = intercom.jek_or_backend_json_call(
        "connections/get-details", data={"connectionName": connection_name})
    return DSSConnection(conn_obj)