Python Create Examples

Programming Language: Python

Namespace/Package Name: optimus.functions

Class/Type: Create

Examples at hotexamples.com: 2

Python Create - 2 examples found. These are the top rated real world Python examples of optimus.functions.Create extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Create(2)

Frequently Used Methods

Create (2)

Example #1

Show file

File: optimus.py Project: schatzr/Optimus

    def __init__(self,
                 session=None,
                 master="local[*]",
                 app_name="optimus",
                 checkpoint=False,
                 path=None,
                 file_system="local",
                 verbose=False,
                 dl=False,
                 server=False,
                 repositories=None,
                 packages=None,
                 jars=None,
                 options=None,
                 additional_options=None,
                 enricher_host="localhost",
                 enricher_port=27017,
                 queue_url=None,
                 queue_exchange=None,
                 queue_routing_key="optimus"):
        """
        Transform and roll out
        :param master: 'Master', 'local' or ip address to a cluster
        :param app_name: Spark app name
        :param path: path to the checkpoint folder
        :param checkpoint: If True create a checkpoint folder
        :param file_system: 'local' or 'hadoop'
        :param additional_options:


        :param options: Configuration options that are passed to spark-submit.
            See `the list of possible options
            <https://spark.apache.org/docs/2.1.0/configuration.html#available-properties>`_.
            Note that any options set already through PYSPARK_SUBMIT_ARGS will override
            these.
        :type options: (dict[str,str])
        :param repositories: List of additional maven repositories for package lookup.
        :type repositories: (list[str])

        :param packages: Spark packages that should be installed.
        :type packages: (list[str])

        :param jars: Full paths to jar files that we want to include to the session.
        :type jars: (list[str])

        """
        if session is None:
            # print("Creating Spark Session...")
            # If a Spark session in not passed by argument create it

            self.master = master
            self.app_name = app_name

            if options is None:
                options = {}

            self.options = options

            if packages is None:
                packages = []
            else:
                packages = val_to_list(packages)

            self.packages = packages
            self.repositories = repositories

            if jars is None:
                jars = {}

            self.jars = jars
            self.additional_options = additional_options

            self.verbose(verbose)

            # Load Avro.
            # TODO: if the Spark 2.4 version is going to be used this is not neccesesary.
            #  Maybe we can check a priori which version fo Spark is going to be used
            # self._add_spark_packages(["com.databricks:spark-avro_2.11:4.0.0"])

            if dl is True:
                self._add_spark_packages(
                    ["databricks:spark-deep-learning:1.5.0-spark2.4-s_2.11"])

                self._start_session()

                from optimus.dl.models import DL
                self.dl = DL()
            else:
                self._start_session()

            if path is None:
                path = os.getcwd()

            if checkpoint is True:
                self._set_check_point_folder(path, file_system)

        else:
            # If a session is passed by arguments  just save the reference
            Spark.instance = session

        # Initialize Spark
        logger.print("""
                             ____        __  _                     
                            / __ \____  / /_(_)___ ___  __  _______
                           / / / / __ \/ __/ / __ `__ \/ / / / ___/
                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
                          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
                              /_/                                  
                              """)

        logger.print(STARTING_OPTIMUS)

        if server:
            logger.print("Starting Optimus Server...")
            s = Server()
            s.start()
            self.server_instance = s

        logger.print(SUCCESS)

        self.create = Create()
        self.load = Load()
        self.read = self.spark.read
        self.profiler = Profiler(queue_url=queue_url,
                                 queue_exchange=queue_exchange,
                                 queue_routing_key=queue_routing_key)
        self.ml = ML()
        self.enricher = Enricher(
            op=self,
            host=enricher_host,
            port=enricher_port,
        )

Example #2

Show file

    def __init__(self, session=None, master="local[*]", app_name="optimus", checkpoint=False, path=None,
                 file_system="local",
                 verbose=False,
                 server=False,
                 repositories=None,
                 packages=None,
                 jars=None,
                 driver_class_path=None,
                 options=None,
                 additional_options=None,
                 queue_url=None,
                 queue_exchange=None,
                 queue_routing_key="optimus"
                 ):

        """
        Transform and roll out
        :param master: 'Master', 'local' or ip address to a cluster
        :param app_name: Spark app name
        :param path: path to the checkpoint folder
        :param checkpoint: If True create a checkpoint folder
        :param file_system: 'local' or 'hadoop'
        :param additional_options:


        :param options: Configuration options that are passed to spark-submit.
            See `the list of possible options
            <https://spark.apache.org/docs/2.4.1/configuration.html#available-properties>`_.
            Note that any options set already through PYSPARK_SUBMIT_ARGS will override
            these.
        :type options: (dict[str,str])
        :param repositories: List of additional maven repositories for package lookup.
        :type repositories: (list[str])

        :param packages: Spark packages that should be installed.
        :type packages: (list[str])

        :param jars: Full paths to jar files that we want to include to the session.
        :type jars: (list[str])

        """
        if session is None:
            # print("Creating Spark Session...")
            # If a Spark session in not passed by argument create it

            self.master = master
            self.app_name = app_name

            if options is None:
                options = {}

            self.options = options

            if packages is None:
                packages = []
            else:
                packages = val_to_list(packages)

            self.packages = packages
            self.repositories = repositories

            # Jars
            self.jars = jars
            self._add_jars(jars)

            # Class Drive Path
            self.driver_class_path = driver_class_path
            self._add_driver_class_path(driver_class_path)

            # Additional Options
            self.additional_options = additional_options

            self.verbose(verbose)

            # Load Avro.
            # TODO:
            #  if the Spark 2.4 version is going to be used this is not neccesesary.
            #  Maybe we can check a priori which version fo Spark is going to be used
            self._add_spark_packages(["com.databricks:spark-avro_2.11:4.0.0"])

            def c(files):
                return [Path(path + file).as_posix() for file in files]

            path = os.path.dirname(os.path.abspath(__file__))

            # Add databases jars
            self._add_jars(["../jars/RedshiftJDBC42-1.2.16.1027.jar", "../jars/mysql-connector-java-8.0.16.jar",
                            "../jars/ojdbc7.jar", "../jars/postgresql-42.2.5.jar"])

            self._add_driver_class_path(
                c(["//jars//RedshiftJDBC42-1.2.16.1027.jar", "//jars//mysql-connector-java-8.0.16.jar",
                   "//jars//ojdbc7.jar", "//jars//postgresql-42.2.5.jar"]))

            self._start_session()

            if path is None:
                path = os.getcwd()

            if checkpoint is True:
                self._set_check_point_folder(path, file_system)

        else:
            # If a session is passed by arguments just save the reference

            Spark.instance = Spark().load(session)

        # Initialize Spark
        logger.print("""
                             ____        __  _                     
                            / __ \____  / /_(_)___ ___  __  _______
                           / / / / __ \/ __/ / __ `__ \/ / / / ___/
                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
                          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
                              /_/                                  
                              """)

        logger.print(STARTING_OPTIMUS)

        if server:
            logger.print("Starting Optimus Server...")
            s = Server()
            s.start()
            self.server_instance = s

        logger.print(SUCCESS)

        self.create = Create()
        self.load = Load()
        self.read = self.spark.read
        self.profiler = Profiler(
            queue_url=queue_url,
            queue_exchange=queue_exchange,
            queue_routing_key=queue_routing_key
        )
        self.ml = ML()

        #
        self._load_css()

        # Set global output as html
        self.output("html")