Beispiel #1
0
class HadoopCommand(Command):
    subcmdlist = ["jar", "s3distcp", "streaming"]
    usage = "hadoopcmd <submit|run> [options] <%s> <arg1> [arg2] ..." % "|".join(
        subcmdlist)

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("--cluster-label",
                         dest="label",
                         help="the label of the cluster to run the command on")

    optparser.add_option("--notify",
                         action="store_true",
                         dest="can_notify",
                         default=False,
                         help="sends an email on command completion")

    optparser.disable_interspersed_args()

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args`: sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """
        parsed = {}

        try:
            (options, args) = cls.optparser.parse_args(args)
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        parsed['label'] = options.label
        parsed['can_notify'] = options.can_notify
        parsed["command_type"] = "HadoopCommand"

        if len(args) < 2:
            raise ParseError("Need at least two arguments", cls.usage)

        subcmd = args.pop(0)
        if subcmd not in cls.subcmdlist:
            raise ParseError("First argument must be one of <%s>" %
                             "|".join(cls.subcmdlist))

        parsed["sub_command"] = subcmd
        parsed["sub_command_args"] = " ".join("'" + a + "'" for a in args)

        return parsed
Beispiel #2
0
class DbTapQueryCommand(Command):
    usage = "dbtapquerycmd <submit|run> [options]"

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("--db_tap_id",
                         dest="db_tap_id",
                         help="dbTap Id of the target database in Qubole")
    optparser.add_option("-q", "--query", dest="query", help="query string")
    optparser.add_option("--notify",
                         action="store_true",
                         dest="can_notify",
                         default=False,
                         help="sends an email on command completion")
    optparser.add_option("--macros",
                         dest="macros",
                         help="expressions to expand macros used in query")

    optparser.add_option("--name",
                         dest="name",
                         help="Assign a name to this command")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args`: sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if (options.db_tap_id is None):
                raise ParseError("db_tap_id is required",
                                 cls.optparser.format_help())
            if (options.query is None):
                raise ParseError("query is required",
                                 cls.optparser.format_help())

        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        if options.macros is not None:
            options.macros = json.loads(options.macros)
        v = vars(options)
        v["command_type"] = "DbTapQueryCommand"
        return v
Beispiel #3
0
class HiveCommand(Command):

    usage = (
        "hivecmd <--query query-string | --script_location location-string>"
        " [--macros <expressions-to-expand-macros>]"
        " [--sample_size <sample-bytes-to-run-query-on]")

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("--query", dest="query", help="query string")

    optparser.add_option("--script_location",
                         dest="script_location",
                         help="Path where hive query to run is stored")

    optparser.add_option("--macros",
                         dest="macros",
                         help="expressions to expand macros used in query")

    optparser.add_option("--sample_size",
                         dest="sample_size",
                         help="size of sample in bytes on which to run query")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args` - sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.query is None and options.script_location is None:
                raise ParseError(
                    "One of query or script location"
                    " must be specified", cls.usage)
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.usage)
        except OptionParsingExit as e:
            return None

        return vars(options)
Beispiel #4
0
class DbImportCommand(Command):
    usage = "dbimportcmd <submit|run> [options]"

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("-m", "--mode", dest="mode",
                         help="Can be 1 for Hive export or 2 for HDFS/S3 export")
    optparser.add_option("--hive_table", dest="hive_table",
                         help="Mode 1: Name of the Hive Table from which data will be exported")
    optparser.add_option("--dbtap_id", dest="dbtap_id",
                         help="Modes 1 and 2: DbTap Id of the target database in Qubole")
    optparser.add_option("--db_table", dest="db_table",
                         help="Modes 1 and 2: Table to export to in the target database")
    optparser.add_option("--where_clause", dest="db_where",
                         help="Mode 1: where clause to be applied to the table before extracting rows to be imported")
    optparser.add_option("--parallelism", dest="db_parallelism",
                         help="Mode 1 and 2: Number of parallel threads to use for extracting data")

    optparser.add_option("--extract_query", dest="db_extract_query",
                         help="Modes 2: SQL query to be applied at the source database for extracting data. "
                              "$CONDITIONS must be part of the where clause")
    optparser.add_option("--boundary_query", dest="db_boundary_query",
                         help="Mode 2: query to be used get range of rowids to be extracted")
    optparser.add_option("--split_column", dest="db_split_column",
                         help="column used as rowid to split data into range")

    optparser.add_option("--notify", action="store_true", dest="can_notify",
                         default=False, help="sends an email on command completion")

    optparser.add_option("--tags", dest="tags",
                         help="comma-separated list of tags to be associated with the query ( e.g., tag1 tag1,tag2 )")

    optparser.add_option("--name", dest="name",
                         help="Assign a name to this command")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args`: sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.mode not in ["1", "2"]:
                raise ParseError("mode must be either '1' or '2'",
                                 cls.optparser.format_help())

            if (options.dbtap_id is None) or (options.db_table is None):
                raise ParseError("dbtap_id and db_table are required",
                                 cls.optparser.format_help())

            # TODO: Semantic checks for parameters in mode 1 and 2

        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        v = vars(options)
        v["command_type"] = "DbImportCommand"
        return v
Beispiel #5
0
class DbExportCommand(Command):
    usage = ("dbexportcmd <submit|run> [options]")

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("-m", "--mode", dest="mode",
                         help="Can be 1 for Hive export or 2 for HDFS/S3 export")
    optparser.add_option("--hive_table", dest="hive_table",
                         help="Mode 1: Name of the Hive Table from which data will be exported")
    optparser.add_option("--partition_spec", dest="partition_spec",
                         help="Mode 1: (optional) Partition specification for Hive table")
    optparser.add_option("--dbtap_id", dest="dbtap_id",
                         help="Modes 1 and 2: DbTap Id of the target database in Qubole")
    optparser.add_option("--db_table", dest="db_table",
                         help="Modes 1 and 2: Table to export to in the target database")
    optparser.add_option("--db_update_mode", dest="db_update_mode",
                         help="Modes 1 and 2: (optional) can be 'allowinsert' or "
                              "'updateonly'. If updateonly is "
                              "specified - only existing rows are updated. If allowinsert "
                              "is specified - then existing rows are updated and non existing "
                              "rows are inserted. If this option is not specified - then the "
                              "given the data will be appended to the table")
    optparser.add_option("--db_update_keys", dest="db_update_keys",
                         help="Modes 1 and 2: Columns used to determine the uniqueness of rows for "
                              "'updateonly' mode")
    optparser.add_option("--export_dir", dest="export_dir",
                         help="Mode 2: HDFS/S3 location from which data will be exported")
    optparser.add_option("--fields_terminated_by", dest="fields_terminated_by",
                         help="Mode 2: Hex of the char used as column separator "
                              "in the dataset, for eg. \0x20 for space")

    optparser.add_option("--notify", action="store_true", dest="can_notify",
                         default=False, help="sends an email on command completion")

    optparser.add_option("--tags", dest="tags",
                         help="comma-separated list of tags to be associated with the query ( e.g., tag1 tag1,tag2 )")

    optparser.add_option("--name", dest="name",
                         help="Assign a name to this command")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args`: sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.mode not in ["1", "2"]:
                raise ParseError("mode must be either '1' or '2'",
                                 cls.optparser.format_help())

            if (options.dbtap_id is None) or (options.db_table is None):
                raise ParseError("dbtap_id and db_table are required",
                                 cls.optparser.format_help())

            if options.mode is "1":
                if options.hive_table is None:
                    raise ParseError("hive_table is required for mode 1",
                                     cls.optparser.format_help())
            elif options.export_dir is None:    # mode 2
                raise ParseError("export_dir is required for mode 2",
                                 cls.optparser.format_help())

            if options.db_update_mode is not None:
                if options.db_update_mode not in ["allowinsert", "updateonly"]:
                    raise ParseError("db_update_mode should either be left blank for append "
                                     "mode or be 'updateonly' or 'allowinsert'",
                                     cls.optparser.format_help())
                if options.db_update_mode is "updateonly":
                    if options.db_update_keys is None:
                        raise ParseError("db_update_keys is required when db_update_mode "
                                         "is 'updateonly'",
                                         cls.optparser.format_help())
                elif options.db_update_keys is not None:
                    raise ParseError("db_update_keys is used only when db_update_mode "
                                     "is 'updateonly'",
                                     cls.optparser.format_help())

        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        v = vars(options)
        v["command_type"] = "DbExportCommand"
        return v
Beispiel #6
0
class PigCommand(Command):
    usage = ("pigcmd <submit|run> [options] [key1=value1] [key2=value2] ...")

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("-s", "--script", dest="latin_statements",
                         help="latin statements that has to be executed")

    optparser.add_option("-f", "--script_location", dest="script_location",
                         help="Path where bash script to run is stored. Can be S3 URI or local file path")

    optparser.add_option("--cluster-label", dest="label",
                         help="the label of the cluster to run the command on")

    optparser.add_option("--notify", action="store_true", dest="can_notify",
                         default=False, help="sends an email on command completion")

    optparser.add_option("--tags", dest="tags",
                         help="comma-separated list of tags to be associated with the query ( e.g., tag1 tag1,tag2 )")

    optparser.add_option("--name", dest="name",
                         help="Assign a name to this command")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args`: sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.latin_statements is None and options.script_location is None:
                raise ParseError("One of script or it's location"
                                 " must be specified",
                                 cls.optparser.format_help())
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        if options.script_location is not None:
            if options.latin_statements is not None:
                raise ParseError(
                    "Both script and script_location cannot be specified",
                    cls.optparser.format_help())

            if ((options.script_location.find("s3://") != 0) and
                (options.script_location.find("s3n://") != 0)):

                # script location is local file

                try:
                    s = open(options.script_location).read()
                except IOError as e:
                    raise ParseError("Unable to open script location: %s" %
                                     str(e),
                                     cls.optparser.format_help())
                options.script_location = None
                options.latin_statements = s

            if (args is not None) and (len(args) > 0):
                if options.latin_statements is not None:
                    raise ParseError(
                        "Extra arguments can only be "
                        "supplied with a script_location in S3 right now",
                        cls.optparser.format_help())

                p = {}
                for a in args:
                    kv = a.split('=')
                    if len(kv) != 2:
                        raise ParseError("Arguments to pig script must be of this format k1=v1 k2=v2 k3=v3...")
                    p[kv[0]] = kv[1]
                setattr(options, 'parameters', p)

        else:
            if (args is not None) and (len(args) > 0):
                raise ParseError(
                    "Extra arguments can only be supplied with a script_location",
                    cls.optparser.format_help())

        v = vars(options)
        v["command_type"] = "PigCommand"
        return v
Beispiel #7
0
class ShellCommand(Command):
    usage = ("shellcmd <submit|run> [options] [arg1] [arg2] ...")

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("-s", "--script", dest="inline", help="inline script that can be executed by bash")

    optparser.add_option("-f", "--script_location", dest="script_location",
                         help="Path where bash script to run is stored. Can be S3 URI or local file path")

    optparser.add_option("-i", "--files", dest="files",
                         help="List of files [optional] Format : file1,file2 (files in s3 bucket) These files will be copied to the working directory where the command is executed")

    optparser.add_option("-a", "--archives", dest="archives",
                         help="List of archives [optional] Format : archive1,archive2 (archives in s3 bucket) These are unarchived in the working directory where the command is executed")

    optparser.add_option("--cluster-label", dest="label",
                         help="the label of the cluster to run the command on")

    optparser.add_option("--notify", action="store_true", dest="can_notify",
                         default=False, help="sends an email on command completion")

    optparser.add_option("--tags", dest="tags",
                         help="comma-separated list of tags to be associated with the query ( e.g., tag1 tag1,tag2 )")

    optparser.add_option("--name", dest="name",
                         help="Assign a name to this command")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args`: sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.inline is None and options.script_location is None:
                raise ParseError("One of script or it's location"
                                 " must be specified",
                                 cls.optparser.format_help())
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        if options.script_location is not None:
            if options.inline is not None:
                raise ParseError(
                    "Both script and script_location cannot be specified",
                    cls.optparser.format_help())

            if ((options.script_location.find("s3://") != 0) and
                (options.script_location.find("s3n://") != 0)):

                # script location is local file

                try:
                    s = open(options.script_location).read()
                except IOError as e:
                    raise ParseError("Unable to open script location: %s" %
                                     str(e),
                                     cls.optparser.format_help())
                options.script_location = None
                options.inline = s

            if (args is not None) and (len(args) > 0):
                if options.inline is not None:
                    raise ParseError(
                        "Extra arguments can only be "
                        "supplied with a script_location in S3 right now",
                        cls.optparser.format_help())

                setattr(options, 'parameters',
                        " ".join([pipes.quote(a) for a in args]))

        else:
            if (args is not None) and (len(args) > 0):
                raise ParseError(
                    "Extra arguments can only be supplied with a script_location",
                    cls.optparser.format_help())

        v = vars(options)
        v["command_type"] = "ShellCommand"
        return v
Beispiel #8
0
class PrestoCommand(Command):

    usage = ("prestocmd <submit|run> [options]")

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("-q", "--query", dest="query", help="query string")

    optparser.add_option("-f", "--script_location", dest="script_location",
                         help="Path where presto query to run is stored. Can be S3 URI or local file path")

    optparser.add_option("--macros", dest="macros",
                         help="expressions to expand macros used in query")

    optparser.add_option("--tags", dest="tags",
                         help="comma-separated list of tags to be associated with the query ( e.g., tag1 tag1,tag2 )")

    optparser.add_option("--cluster-label", dest="label",
                         help="the label of the cluster to run the command on")

    optparser.add_option("--notify", action="store_true", dest="can_notify",
                         default=False, help="sends an email on command completion")

    optparser.add_option("--name", dest="name",
                         help="Assign a name to this query")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args`: sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.query is None and options.script_location is None:
                raise ParseError("One of query or script location"
                                 " must be specified",
                                 cls.optparser.format_help())
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        if options.script_location is not None:
            if options.query is not None:
                raise ParseError(
                    "Both query and script_location cannot be specified",
                    cls.optparser.format_help())

            if ((options.script_location.find("s3://") != 0) and
                (options.script_location.find("s3n://") != 0)):

                # script location is local file
                try:
                    q = open(options.script_location).read()
                except IOError as e:
                    raise ParseError("Unable to open script location: %s" %
                                     str(e),
                                     cls.optparser.format_help())
                options.script_location = None
                options.query = q

        if options.macros is not None:
            options.macros = json.loads(options.macros)
        v = vars(options)
        v["command_type"] = "PrestoCommand"
        return v
Beispiel #9
0
class PigCommand(Command):
    usage = ("pigcmd run [options] [key1=value1] [key2=value2] ...")
               

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("-s", "--script", dest="latin_statements",
                         help="latin statements that has to be executed")

    optparser.add_option("-f", "--script_location", dest="script_location", 
                         help="Path where bash script to run is stored. Can be S3 URI or local file path")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args` - sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.latin_statements is None and options.script_location is None:
                raise ParseError("One of script or it's location"
                                 " must be specified", 
                                 cls.optparser.format_help())
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        if options.script_location is not None:
            if options.latin_statements is not None:
                raise ParseError(
                    "Both script and script_location cannot be specified", 
                    cls.optparser.format_help())

            if ((options.script_location.find("s3://") != 0) and
                (options.script_location.find("s3n://") != 0)):

                # script location is local file
                
                try:
                    s = open(options.script_location).read()
                except:
                    raise ParseError("Unable to open script location: %s" % 
                                     options.script_location,
                                     cls.optparser.format_help())
                options.script_location = None
                options.latin_statements = s

            if ((args is not None) and (len(args) > 0)):
                if options.latin_statements is not None:
                    raise ParseError(
                        "This sucks - but extra arguments can only be "
                        "supplied with a script_location in S3 right now",
                        cls.optparser.format_help())

                p = {}
                for a in args:
                  kv = a.split('=')
                  if len(kv)!=2:
                    raise ParseError("Arguments to pig script must be of this format k1=v1 k2=v2 k3=v3...")
                  p[kv[0]] = kv[1]
                setattr(options, 'parameters',p)

        else:
            if ((args is not None) and (len(args) > 0)):
                raise ParseError(
                    "Extra arguments can only be supplied with a script_location",
                    cls.optparser.format_help())                
        
        return vars(options)
Beispiel #10
0
class ShellCommand(Command):
    usage = ("shellcmd run [options] [arg1] [arg2] ...")
               

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("-s", "--script", dest="inline", help="inline script that can be executed by bash")

    optparser.add_option("-f", "--script_location", dest="script_location", 
                         help="Path where bash script to run is stored. Can be S3 URI or local file path")

    optparser.add_option("-i", "--files", dest="files", 
                         help="List of files [optional] Format : file1,file2 (files in s3 bucket) These files will be copied to the working directory where the command is executed")
    
    optparser.add_option("-a", "--archive", dest="archive", 
                         help="List of archives [optional] Format : archive1,archive2 (archives in s3 bucket) These are unarchived in the working directory where the command is executed")
    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args` - sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.inline is None and options.script_location is None:
                raise ParseError("One of script or it's location"
                                 " must be specified", 
                                 cls.optparser.format_help())
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        if options.script_location is not None:
            if options.inline is not None:
                raise ParseError(
                    "Both script and script_location cannot be specified", 
                    cls.optparser.format_help())

            if ((options.script_location.find("s3://") != 0) and
                (options.script_location.find("s3n://") != 0)):

                # script location is local file
                
                try:
                    s = open(options.script_location).read()
                except:
                    raise ParseError("Unable to open script location: %s" % 
                                     options.script_location,
                                     cls.optparser.format_help())
                options.script_location = None
                options.inline = s

            if ((args is not None) and (len(args) > 0)):
                if options.inline is not None:
                    raise ParseError(
                        "This sucks - but extra arguments can only be "
                        "supplied with a script_location in S3 right now",
                        cls.optparser.format_help())

                setattr(options, 'parameters',
                        " ".join([pipes.quote(a) for a in args]))


        else:
            if ((args is not None) and (len(args) > 0)):
                raise ParseError(
                    "Extra arguments can only be supplied with a script_location",
                    cls.optparser.format_help())                

        return vars(options)
Beispiel #11
0
class PrestoCommand(Command):

    usage = ("prestocmd run [options]")


    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("-q", "--query", dest="query", help="query string")

    optparser.add_option("-f", "--script_location", dest="script_location",
                         help="Path where presto query to run is stored. Can be S3 URI or local file path")

    optparser.add_option("--macros", dest="macros",
                         help="expressions to expand macros used in query")

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args` - sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """

        try:
            (options, args) = cls.optparser.parse_args(args)
            if options.query is None and options.script_location is None:
                raise ParseError("One of query or script location"
                                 " must be specified",
                                 cls.optparser.format_help())
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        if options.script_location is not None:
            if options.query is not None:
                raise ParseError(
                    "Both query and script_location cannot be specified",
                    cls.optparser.format_help())

            if ((options.script_location.find("s3://") != 0) and
                (options.script_location.find("s3n://") != 0)):

                # script location is local file
                try:
                    q = open(options.script_location).read()
                except:
                    raise ParseError("Unable to open script location: %s" %
                                     options.script_location,
                                     cls.optparser.format_help())
                options.script_location = None
                options.query = q


        if options.macros is not None:
            options.macros = json.loads(options.macros)
        return vars(options)
Beispiel #12
0
class SparkCommand(Command):

    usage = ("sparkcmd <submit|run> [options]")
    allowedlanglist = ["python", "scala"]

    optparser = GentleOptionParser(usage=usage)
    optparser.add_option("--program", dest="program", help=SUPPRESS_HELP)

    optparser.add_option("--cmdline",
                         dest="cmdline",
                         help="command line for Spark")

    optparser.add_option("--sql", dest="sql", help="sql for Spark")

    optparser.add_option(
        "-f",
        "--script_location",
        dest="script_location",
        help=
        "Path where spark program to run is stored. Has to be a local file path"
    )

    optparser.add_option("--macros",
                         dest="macros",
                         help="expressions to expand macros used in query")

    optparser.add_option(
        "--tags",
        dest="tags",
        help=
        "comma-separated list of tags to be associated with the query ( e.g., tag1 tag1,tag2 )"
    )

    optparser.add_option("--cluster-label",
                         dest="label",
                         help="the label of the cluster to run the command on")

    optparser.add_option("--language",
                         dest="language",
                         choices=allowedlanglist,
                         help=SUPPRESS_HELP)

    optparser.add_option("--notify",
                         action="store_true",
                         dest="can_notify",
                         default=False,
                         help="sends an email on command completion")

    optparser.add_option("--name",
                         dest="name",
                         help="Assign a name to this query")

    optparser.add_option("--arguments",
                         dest="arguments",
                         help="Spark Submit Command Line Options")

    optparser.add_option("--user_program_arguments",
                         dest="user_program_arguments",
                         help="Arguments for User Program")

    optparser.add_option("--print-logs",
                         action="store_true",
                         dest="print_logs",
                         default=False,
                         help="Fetch logs and print them to stderr.")

    @classmethod
    def validate_program(cls, options):
        bool_program = options.program is not None
        bool_other_options = options.script_location is not None or options.cmdline is not None or options.sql is not None

        # if both are false then no option is specified ==> raise ParseError
        # if both are true then atleast two option specified ==> raise ParseError
        if bool_program == bool_other_options:
            raise ParseError(
                "Exactly One of script location or program or cmdline or sql should be specified",
                cls.optparser.format_help())
        if bool_program:
            if options.language is None:
                raise ParseError("Unspecified language for Program",
                                 cls.optparser.format_help())

    @classmethod
    def validate_cmdline(cls, options):
        bool_cmdline = options.cmdline is not None
        bool_other_options = options.script_location is not None or options.program is not None or options.sql is not None

        # if both are false then no option is specified ==> raise ParseError
        # if both are true then atleast two option specified ==> raise ParseError
        if bool_cmdline == bool_other_options:
            raise ParseError(
                "Exactly One of script location or program or cmdline or sql should be specified",
                cls.optparser.format_help())
        if bool_cmdline:
            if options.language is not None:
                raise ParseError(
                    "Language cannot be specified with the commandline option",
                    cls.optparser.format_help())

    @classmethod
    def validate_sql(cls, options):
        bool_sql = options.sql is not None
        bool_other_options = options.script_location is not None or options.program is not None or options.cmdline is not None

        # if both are false then no option is specified => raise PraseError
        # if both are true then atleast two option specified => raise ParseError
        if bool_sql == bool_other_options:
            raise ParseError(
                "Exactly One of script location or program or cmdline or sql should be specified",
                cls.optparser.format_help())
        if bool_sql:
            if options.language is not None:
                raise ParseError(
                    "Language cannot be specified with the 'sql' option",
                    cls.optparser.format_help())

    @classmethod
    def validate_script_location(cls, options):
        bool_script_location = options.script_location is not None
        bool_other_options = options.program is not None or options.cmdline is not None or options.sql is not None

        # if both are false then no option is specified ==> raise ParseError
        # if both are true then atleast two option specified ==> raise ParseError
        if bool_script_location == bool_other_options:
            raise ParseError(
                "Exactly One of script location or program or cmdline or sql should be specified",
                cls.optparser.format_help())

        if bool_script_location:
            if options.language is not None:
                raise ParseError(
                    "Both script location and language cannot be specified together",
                    cls.optparser.format_help())
            # for now, aws script_location is not supported and throws an error
            if ((options.script_location.find("s3://") != 0)
                    and (options.script_location.find("s3n://") != 0)):

                # script location is local file so set the program as the text from the file

                try:
                    q = open(options.script_location).read()
                except IOError as e:
                    raise ParseError(
                        "Unable to open script location: %s" % str(e),
                        cls.optparser.format_help())

                fileName, fileExtension = os.path.splitext(
                    options.script_location)
                # getting the language of the program from the file extension
                if fileExtension == ".py":
                    options.language = "python"
                elif fileExtension == ".scala":
                    options.language = "scala"
                else:
                    raise ParseError(
                        "Invalid program type, Please choose one from python or scala %s"
                        % str(fileExtension), cls.optparser.format_help())
            else:
                raise ParseError(
                    "Invalid location, Please choose a local file location",
                    cls.optparser.format_help())

            options.script_location = None
            options.program = q

    @classmethod
    def parse(cls, args):
        """
        Parse command line arguments to construct a dictionary of command
        parameters that can be used to create a command

        Args:
            `args`: sequence of arguments

        Returns:
            Dictionary that can be used in create method

        Raises:
            ParseError: when the arguments are not correct
        """
        try:
            (options, args) = cls.optparser.parse_args(args)
        except OptionParsingError as e:
            raise ParseError(e.msg, cls.optparser.format_help())
        except OptionParsingExit as e:
            return None

        SparkCommand.validate_program(options)
        SparkCommand.validate_script_location(options)
        SparkCommand.validate_cmdline(options)
        SparkCommand.validate_sql(options)

        if options.macros is not None:
            options.macros = json.loads(options.macros)

        v = vars(options)
        v["command_type"] = "SparkCommand"
        return v