def __init__(self, *args, **kwargs):
     conn = self.get_connection(kwargs['qubole_conn_id'])
     Qubole.configure(api_token=conn.password, api_url=conn.host)
     self.task_id = kwargs['task_id']
     self.dag_id = kwargs['dag'].dag_id
     self.kwargs = kwargs
     self.cls = COMMAND_CLASSES[self.kwargs['command_type']]
     self.cmd = None
 def _configure_qubole(self):
     logging.basicConfig(level=logging.INFO)
     logger = logging.getLogger('qds_connection')
     logger.propagate = False
     qdslog = logging.getLogger('qds')
     if not self.config.API_TOKEN:
         raise Exception("You didn't specify your QUBOLE_API_TOKEN in "
                         "your environment before running commands on "
                         "Qubole!\n. It can be found at http://api.qubole"
                         ".com/users/edit")
     Qubole.configure(api_token=self.config.API_TOKEN,
                      api_url=self.config.API_URL,
                      version=self.config.API_VERSION,
                      poll_interval=self.config.POLL_INTERVAL_SEC)
     return qdslog
Exemple #3
0
 def update(cls, cluster_id_label, cluster_info):
     """
     Update the cluster with id/label `cluster_id_label` using information provided in
     `cluster_info`.
     """
     conn = Qubole.agent(version="v2")
     return conn.put(cls.element_path(cluster_id_label), data=cluster_info)
 def index(cls):
     """
     Shows a list of all available reports by issuing a GET request to the
     /reports endpoint.
     """
     conn = Qubole.agent()
     return conn.get(cls.rest_entity_path)
    def poke(self, context):
        conn = BaseHook.get_connection(self.qubole_conn_id)
        Qubole.configure(api_token=conn.password, api_url=conn.host)

        this.log.info('Poking: %s', self.data)

        status = False
        try:
            status = self.sensor_class.check(self.data)
        except Exception as e:
            logging.exception(e)
            status = False

        this.log.info('Status of this Poke: %s', status)

        return status
Exemple #6
0
 def find_by_name(name):
     conn = Qubole.agent()
     if name is not None:
         schedjson = conn.get(Scheduler.rest_entity_path, params={"name":name})
         if schedjson["schedules"]:
             return Scheduler(schedjson["schedules"][0])
     return None
Exemple #7
0
 def terminate(cls, cluster_id_label):
     """
     Terminate the cluster with id/label `cluster_id_label`.
     """
     conn = Qubole.agent()
     data = {"state": "terminate"}
     return conn.put(cls.element_path(cluster_id_label) + "/state", data)
Exemple #8
0
 def clone(cls, cluster_id_label, cluster_info):
     """
     Update the cluster with id/label `cluster_id_label` using information provided in
     `cluster_info`.
     """
     conn = Qubole.agent()
     return conn.post(cls.element_path(cluster_id_label) + '/clone', data=cluster_info)
Exemple #9
0
 def add_node(cls, cluster_id_label, parameters=None):
   """
   Add a node to an existing cluster
   """
   conn = Qubole.agent()
   parameters = {} if not parameters else parameters
   return conn.post(cls.element_path(cluster_id_label) + "/nodes", data={"parameters" : parameters})
Exemple #10
0
 def start(cls, cluster_id_label):
     """
     Start the cluster with id/label `cluster_id_label`.
     """
     conn = Qubole.agent()
     data = {"state": "start"}
     return conn.put(cls.element_path(cluster_id_label) + "/state", data)
Exemple #11
0
    def get_results(self, fp=sys.stdout, inline=True, delim=None):
        """
        Fetches the result for the command represented by this object

        Args:
            `fp`: a file object to write the results to directly
        """
        result_path = self.meta_data['results_resource']

        conn = Qubole.agent()

        r = conn.get(result_path, {'inline': inline})
        if r.get('inline'):
            if sys.version_info < (3, 0, 0):
                fp.write(r['results'].encode('utf8'))
            else:
                import io
                if isinstance(fp, io.TextIOBase):
                    fp.buffer.write(r['results'].encode('utf8'))
                elif isinstance(fp, io.BufferedIOBase) or isinstance(fp, io.RawIOBase):
                    fp.write(r['results'].encode('utf8'))
                else:
                    # Can this happen? Don't know what's the right thing to do in this case.
                    pass
        else:
            acc = Account.find()
            boto_conn = boto.connect_s3(aws_access_key_id=acc.storage_access_key,
                                        aws_secret_access_key=acc.storage_secret_key)

            log.info("Starting download from result locations: [%s]" % ",".join(r['result_location']))
            #fetch latest value of num_result_dir
            num_result_dir = Command.find(self.id).num_result_dir
            for s3_path in r['result_location']:
                # In Python 3, in this case, `fp` should always be binary mode.
                _download_to_local(boto_conn, s3_path, fp, num_result_dir, delim=delim)
Exemple #12
0
    def list(cls, label=None, cluster_id=None, state=None):
        """
        List existing clusters present in your account.

        Kwargs:
            `state`: list only those clusters which are in this state

        Returns:
            List of clusters satisfying the given criteria
        """
        if cluster_id is not None:
            return cls.show(cluster_id)
        if label is not None:
            return cls.show(label)
        conn = Qubole.agent(version="v2")
        cluster_list = conn.get(cls.rest_entity_path)
        if state is None:
            # return the complete list since state is None
            return conn.get(cls.rest_entity_path)
        # filter clusters based on state
        result = []
        if 'clusters' in cluster_list:
            for cluster in cluster_list['clusters']:
                if state.lower() == cluster['state'].lower():
                    result.append(cluster)
        return result
 def find(cls, name="default", **kwargs):
     if (name is None) or (name == "default"):
         conn = Qubole.agent()
         return cls(conn.get(cls.rest_entity_path))
     else:
         raise ParseError("Bad name %s" % name,
                          "Hadoop Clusters can only be named 'default' currently")
 def __init__(self, access=None, secret = None, testmode=False, db_parallelism=None, mode=None, db_table=None, db_where=None, db_columns=None, db_boundary_query=None, db_extract_query=None, db_split_column=None, hive_table=None, part_spec=None, db_user=None, db_passwd=None, db_host=None, db_port=None, db_type=None, db_name=None, api_token = None, api_url=None, fetch_size = None):
   self.temp_location = "/tmp/sqoop/"+uuid.uuid1().hex
   self.tmp_dir = tempfile.mkdtemp(prefix="/media/ephemeral0/logs"+"/sqoop")
   logger.info("Temp Directory is:" + self.tmp_dir)
   self.access = access
   self.secret = secret
   self.api_token = api_token
   self.api_url = api_url
   self.fetch_size = fetch_size
   self.redshift_sink = False
   self.__loadImportParamsFromCid(testmode, db_parallelism, mode, db_table, db_where, db_columns, db_boundary_query, db_extract_query, db_split_column, hive_table, part_spec, db_user, db_passwd, db_host, db_port, db_type, db_name)
   self.sqoop_cmd=["/usr/lib/sqoop-h2/bin/sqoop"]
   self.sqoop_cmd.extend(["import"])
   self.__addBasicOptions()
   self.__extendCmdSpecificOptions()
   Qubole.configure(api_token=api_token, api_url=api_url)
   self.cluster_label = Cluster.show(os.popen("cat /usr/lib/hustler/bin/nodeinfo_src.sh | grep cluster_id").read().split("=")[1].strip().replace('"',''))['cluster']['label'][0]
Exemple #15
0
 def update_node(cls, cluster_id_label, command, private_dns, parameters=None):
     """
     Add a node to an existing cluster
     """
     conn = Qubole.agent()
     parameters = {} if not parameters else parameters
     data = {"command" : command, "private_dns" : private_dns, "parameters" : parameters}
     return conn.put(cls.element_path(cluster_id_label) + "/nodes", data)
Exemple #16
0
 def check(cls, data):
     """
     Method to call the sensors api with json payload
     :param data: valid json object
     :return: True or False
     """
     conn = Qubole.agent()
     return conn.post(cls.rest_entity_path, data=data)['status']
Exemple #17
0
    def create_update_clone_parser(subparser, action=None):
        # cloud config parser
        cloud = Qubole.get_cloud()
        cloud.create_parser(subparser)

        # cluster info parser
        ClusterInfoV2.cluster_info_parser(subparser, action)

        # engine config parser
        Engine.engine_parser(subparser)
Exemple #18
0
 def snapshot(cls, cluster_id_label, s3_location, backup_type):
     """
     Create hbase snapshot full/incremental
     """
     conn = Qubole.agent()
     parameters = {}
     parameters['s3_location'] = s3_location
     if backup_type:
         parameters['backup_type'] = backup_type
     return conn.post(cls.element_path(cluster_id_label) + "/snapshots", data=parameters)
Exemple #19
0
    def get_log_id(cls, id):
        """
        Fetches log for the command represented by this id

        Args:
            `id`: command id
        """
        conn = Qubole.agent()
        r = conn.get_raw(cls.element_path(id) + "/logs")
        return r.text
Exemple #20
0
    def cancel_id(cls, id):
        """
        Cancels command denoted by this id

        Args:
            `id`: command id
        """
        conn = Qubole.agent()
        data = {"status": "kill"}
        return conn.put(cls.element_path(id), data)
 def execute(self):
   logger.info("Running DbImportCommand " + str(self.sqoop_cmd))
   if self.api_url is None:
     Qubole.configure(api_token=self.api_token)
   else:
     Qubole.configure(api_token=self.api_token, api_url = self.api_url)
   p = Popen(self.sqoop_cmd, cwd=self.tmp_dir)
   retCode = p.wait()
   a= os.popen("grep s3_default_db_location /usr/lib/hustler/bin/nodeinfo_src.sh").read()
   print(self.temp_location)
   print(self.get_s3_loc())
   p = Popen(["hadoop", "dfs","-cp", self.temp_location, self.get_s3_loc() + self.temp_location])
   retCode1 = p.wait()
   if retCode != 0 or retCode1 != 0:
     logger.warn("sqoop retCode = " + str(retCode))
     self.__runCleanupScript() 
     self.__runDfsCleanup()
     return(retCode or retCode1)
   else:
     logger.debug("sqoop retCode = " + str(retCode))
   retCode = 1
   if self.cmd_row['test_mode']:
     logger.debug("Not running hive in test mode.")
     retCode = 0
   else:
     logger.info("Running hive script.")
     self.fixHiveQuery()
     q = open(self.tmp_dir+"/hive_query.q").read()
     logger.info("Query is: " + q)
     cmd=HiveCommand.create(query=q, label=self.cluster_label)
     while not Command.is_done(cmd.status):
       time.sleep(5)
       cmd = Command.find(cmd.id)
       logger.info("Hive command id: " + str(cmd.id) + "status: "+ str(cmd.status))
     logger.info(cmd.status)
     if cmd.status ==  "done":
       retCode = 0
   if retCode != 0:
     self.__runCleanupScript() 
           
   self.__runDfsCleanup()
   return(retCode)
Exemple #22
0
    def createTemplate(data):
        """
        Create a new template.

        Args:
            `data`: json data required for creating a template
        Returns:
            Dictionary containing the details of the template with its ID.
        """
        conn = Qubole.agent()
        return conn.post(Template.rest_entity_path, data)
Exemple #23
0
    def get_log(self):
        """
        Fetches log for the command represented by this object

        Returns:
            The log as a string
        """
        log_path = self.meta_data['logs_resource']
        conn = Qubole.agent()
        r = conn.get_raw(log_path)
        return r.text
Exemple #24
0
def main():

    logging.basicConfig(level=logging.INFO)

    if (len(sys.argv) < 3):
        usage()

    if (len(sys.argv) >= 2 and sys.argv[1] == "-h"):
        usage(0)

    api_token = sys.argv[1]
    output_path = sys.argv[2]
        
    Qubole.configure(api_token=api_token)

    args = HadoopCommand.parse(("streaming -files s3n://paid-qubole/HadoopAPIExamples/WordCountPython/mapper.py,s3n://paid-qubole/HadoopAPIExamples/WordCountPython/reducer.py -mapper mapper.py -reducer reducer.py -numReduceTasks 1 -input s3n://paid-qubole/default-datasets/gutenberg -output %s" % output_path).split())

    cmd = HadoopCommand.run(**args)
    
    print("Streaming Job run via command id: %s, finished with status %s" 
          % (cmd.id, cmd.status))
Exemple #25
0
    def editTemplate(id, data):
        """
        Edit an existing template.

        Args:
            `id`:   ID of the template to edit
            `data`: json data to be updated
        Returns:
            Dictionary containing the updated details of the template.
        """
        conn = Qubole.agent()
        return conn.put(Template.element_path(id), data)
Exemple #26
0
    def viewTemplate(id):
        """
        View an existing Template details.

        Args:
            `id`: ID of the template to fetch
        
        Returns:
            Dictionary containing the details of the template.
        """
        conn = Qubole.agent()
        return conn.get(Template.element_path(id))
Exemple #27
0
 def restore_point(cls, cluster_id_label, s3_location, backup_id, table_names, overwrite=True, automatic=True):
     """
     Restoring cluster from a given hbase snapshot id
     """
     conn = Qubole.agent()
     parameters = {}
     parameters['s3_location'] = s3_location
     parameters['backup_id'] = backup_id
     parameters['table_names'] = table_names
     parameters['overwrite'] = overwrite
     parameters['automatic'] = automatic
     return conn.post(cls.element_path(cluster_id_label) + "/restore_point", data=parameters)
Exemple #28
0
    def get_results(self, fp=sys.stdout, inline=True, delim=None, fetch=True):
        """
        Fetches the result for the command represented by this object

        get_results will retrieve results of the command and write to stdout by default.
        Optionally one can write to a filestream specified in `fp`. The `inline` argument
        decides whether the result can be returned as a CRLF separated string. In cases where
        the results are greater than 20MB, get_results will attempt to read from s3 and write
        to fp. The retrieval of results from s3 can be turned off by the `fetch` argument

        Args:
            `fp`: a file object to write the results to directly
            `inline`: whether or not results are returned inline as CRLF separated string
            `fetch`: True to fetch the result even if it is greater than 20MB, False to
                     only get the result location on s3
        """
        result_path = self.meta_data["results_resource"]

        conn = Qubole.agent()

        r = conn.get(result_path, {"inline": inline})
        if r.get("inline"):
            if sys.version_info < (3, 0, 0):
                fp.write(r["results"].encode("utf8"))
            else:
                import io

                if isinstance(fp, io.TextIOBase):
                    fp.buffer.write(r["results"].encode("utf8"))
                elif isinstance(fp, io.BufferedIOBase) or isinstance(fp, io.RawIOBase):
                    fp.write(r["results"].encode("utf8"))
                else:
                    # Can this happen? Don't know what's the right thing to do in this case.
                    pass
        else:
            if fetch:
                acc = Account.find()
                boto_conn = boto.connect_s3(
                    aws_access_key_id=acc.storage_access_key, aws_secret_access_key=acc.storage_secret_key
                )

                log.info("Starting download from result locations: [%s]" % ",".join(r["result_location"]))
                # fetch latest value of num_result_dir
                num_result_dir = Command.find(self.id).num_result_dir
                for s3_path in r["result_location"]:
                    # In Python 3,
                    # If the delim is None, fp should be in binary mode because
                    # boto expects it to be.
                    # If the delim is not None, then both text and binary modes
                    # work.
                    _download_to_local(boto_conn, s3_path, fp, num_result_dir, delim=delim)
            else:
                fp.write(",".join(r["result_location"]))
    def show(cls, report_name, data):
        """
        Shows a report by issuing a GET request to the /reports/report_name
        endpoint.

        Args:
            `report_name`: the name of the report to show

            `data`: the parameters for the report
        """
        conn = Qubole.agent()
        return conn.get(cls.element_path(report_name), data)
Exemple #30
0
def main():
    root = logging.getLogger()
    root.setLevel(logging.INFO)

    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(module)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    root.addHandler(ch)

    # I am using this slightly complicated trick to pass config in the constructor of
    # other packages. Better way to do this ?

    config_parser, argparser = setup_parsers()

    config_args, remaining_argv = config_parser.parse_known_args()
    config = load_config(config_args)

    args = argparser.parse_args(remaining_argv)

    if args.debug:
        ch.setLevel(logging.DEBUG)
        root.setLevel(logging.DEBUG)
        logging.debug("Debug is ON!")
    if args.log_file is not None:
        fh = logging.FileHandler(args.log_file, mode='w')
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(formatter)

        root.setLevel(logging.DEBUG)
        root.addHandler(fh)
    try:
        Qubole.configure(
            api_token=config.get("default", "auth_token"),
            api_url=config.get("default", "api_url"),
            skip_ssl_cert_check=True
        )
        args.func(config, args)
    finally:
        logging.debug("Cleaning up")
Exemple #31
0
    def find(cls, **kwargs):
        if cls.cached_resource is None:
            conn = Qubole.agent()
            cls.cached_resource = cls(conn.get(cls.rest_entity_path))

        return cls.cached_resource
Exemple #32
0
def main():

    optparser = OptionParser(usage=usage_str)
    optparser.add_option(
        "--token",
        dest="api_token",
        default=os.getenv('QDS_API_TOKEN'),
        help=
        "api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN"
    )

    optparser.add_option(
        "--url",
        dest="api_url",
        default=os.getenv('QDS_API_URL'),
        help=
        "base url for QDS REST API. defaults to https://api.qubole.com/api ")

    optparser.add_option(
        "--version",
        dest="api_version",
        default=os.getenv('QDS_API_VERSION'),
        help="version of REST API to access. defaults to v1.2")

    optparser.add_option(
        "--poll_interval",
        dest="poll_interval",
        default=os.getenv('QDS_POLL_INTERVAL'),
        help=
        "interval for polling API for completion and other events. defaults to 5s"
    )

    optparser.add_option("-v",
                         dest="verbose",
                         action="store_true",
                         default=False,
                         help="verbose mode - info level logging")

    optparser.add_option("--vv",
                         dest="chatty",
                         action="store_true",
                         default=False,
                         help="very verbose mode - debug level logging")

    optparser.disable_interspersed_args()
    (options, args) = optparser.parse_args()

    if options.chatty:
        logging.basicConfig(level=logging.DEBUG)
    elif options.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        # whatever is dictated by logging config
        pass

    if options.api_token is None:
        raise Exception("No API Token provided")

    if options.api_url is None:
        options.api_url = "https://api.qubole.com/api/"

    if options.api_version is None:
        options.api_version = "v1.2"

    if options.poll_interval is None:
        options.poll_interval = 5

    Qubole.configure(api_token=options.api_token,
                     api_url=options.api_url,
                     version=options.api_version,
                     poll_interval=options.poll_interval)

    if len(args) < 1:
        sys.stderr.write("Missing first argument containing command type\n")
        usage()

    cmdset = set(["hive", "pig", "hadoop"])
    cmdsuffix = "cmd"

    cmd = args.pop(0)

    if ((cmd.find(cmdsuffix) != len(cmd) - 3)
            or (cmd[:cmd.find(cmdsuffix)] not in cmdset)):
        sys.stderr.write("First command must be one of <%s>\n" %
                         "|".join(cmdset))
        usage()

    return cmdmain(cmd[:cmd.find(cmdsuffix)], args)
Exemple #33
0
 def find(cls, id, **kwargs):
     conn = Qubole.agent()
     if id is not None:
         return cls(conn.get(cls.element_path(id)))
Exemple #34
0
 def update(cls, id, **kwargs):
     conn = Qubole.agent()
     return conn.put(cls.element_path(id), data=kwargs)
Exemple #35
0
 def status(cls, cluster_id_label):
     """
     Show the status of the cluster with id/label `cluster_id_label`.
     """
     conn = Qubole.agent()
     return conn.get(cls.element_path(cluster_id_label) + "/state")
Exemple #36
0
 def __init__(self, name, context, **kwargs):
     super(QuboleCluster, self).__init__(name, context, kwargs=kwargs)
     self._filesystem = S3Filesystem(self.logger, context, **kwargs)
     Qubole.configure(api_token=context.settings['qds_api_token'])
Exemple #37
0
 def rerun(args):
     conn = Qubole.agent()
     ret_val = conn.post(Action.element_path(args.id) + "/rerun", data=None)
     return json.dumps(ret_val, sort_keys=True, indent=4)
Exemple #38
0
 def create(cls, cluster_info):
     """
     Create a new cluster using information provided in `cluster_info`.
     """
     conn = Qubole.agent()
     return conn.post(cls.rest_entity_path, data=cluster_info)
Exemple #39
0
 def create(cls, **kwargs):
     conn = Qubole.agent()
     return cls(conn.post(cls.rest_entity_path, data=kwargs))
Exemple #40
0
 def rerun(self, instance_id):
     conn = Qubole.agent()
     url_path = self.element_path(
         id) + "/instances/" + instance_id + "/rerun"
     return conn.post(url_path)['status']
Exemple #41
0
 def show(cls, cluster_id_label):
     """
     Show information about the cluster with id/label `cluster_id_label`.
     """
     conn = Qubole.agent()
     return conn.get(cls.element_path(cluster_id_label))
Exemple #42
0
 def kill(self):
     conn = Qubole.agent()
     data = {"status": "kill"}
     return conn.put(self.element_path(self.id), data)
Exemple #43
0
 def resume(self):
     conn = Qubole.agent()
     data = {"status": "resume"}
     return conn.put(self.element_path(self.id), data)
Exemple #44
0
 def suspend(self):
     conn = Qubole.agent()
     data = {"status": "suspend"}
     return conn.put(self.element_path(self.id), data)
Exemple #45
0
 def delete(cls, id):
     conn = Qubole.agent()
     return conn.delete(cls.element_path(id))
Exemple #46
0
 def list_roles(group_id):
     conn = Qubole.agent()
     url_path = "groups/%s/roles" % group_id
     return conn.get(url_path)
Exemple #47
0
 def rerun(self):
     conn = Qubole.agent()
     return conn.post(self.element_path(self.id) + "/rerun", data=None)
Exemple #48
0
 def __init__(self, api_token=None):
     self._check_qubole_api_token_is_assigned(api_token=api_token)
     Qubole.configure(api_token=api_token)
     print('Connected to Qubole')
     self.old_std_out = []
     self.status = None
Exemple #49
0
 def delete(cls, cluster_id_label):
     """
     Delete the cluster with id/label `cluster_id_label`.
     """
     conn = Qubole.agent()
     return conn.delete(cls.element_path(cluster_id_label))
Exemple #50
0
def main():

    optparser = OptionParser(usage=usage_str)
    optparser.add_option(
        "--token",
        dest="api_token",
        default=os.getenv('QDS_API_TOKEN'),
        help=
        "api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN"
    )

    optparser.add_option(
        "--url",
        dest="api_url",
        default=os.getenv('QDS_API_URL'),
        help=
        "base url for QDS REST API. defaults to https://api.qubole.com/api ")

    optparser.add_option(
        "--version",
        dest="api_version",
        default=os.getenv('QDS_API_VERSION'),
        help="version of REST API to access. defaults to v1.2")

    optparser.add_option(
        "--poll_interval",
        dest="poll_interval",
        default=os.getenv('QDS_POLL_INTERVAL'),
        help=
        "interval for polling API for completion and other events. defaults to 5s"
    )

    optparser.add_option(
        "--skip_ssl_cert_check",
        dest="skip_ssl_cert_check",
        action="store_true",
        default=False,
        help=
        "skip verification of server SSL certificate. Insecure: use with caution."
    )

    optparser.add_option("-v",
                         dest="verbose",
                         action="store_true",
                         default=False,
                         help="verbose mode - info level logging")

    optparser.add_option("--vv",
                         dest="chatty",
                         action="store_true",
                         default=False,
                         help="very verbose mode - debug level logging")

    optparser.disable_interspersed_args()
    (options, args) = optparser.parse_args()

    if options.chatty:
        logging.basicConfig(level=logging.DEBUG)
    elif options.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARN)
        pass

    if options.api_token is None:
        sys.stderr.write("No API Token provided\n")
        usage(optparser)

    if options.api_url is None:
        options.api_url = "https://api.qubole.com/api/"

    if options.api_version is None:
        options.api_version = "v1.2"

    if options.poll_interval is None:
        options.poll_interval = 5

    if options.skip_ssl_cert_check is None:
        options.skip_ssl_cert_check = False
    elif options.skip_ssl_cert_check:
        sys.stderr.write(
            "[WARN] Insecure mode enabled: skipping SSL cert verification\n")

    Qubole.configure(api_token=options.api_token,
                     api_url=options.api_url,
                     version=options.api_version,
                     poll_interval=options.poll_interval,
                     skip_ssl_cert_check=options.skip_ssl_cert_check)

    if len(args) < 1:
        sys.stderr.write("Missing first argument containing command type\n")
        usage(optparser)

    cmdsuffix = "cmd"
    cmdset = set([
        x + cmdsuffix
        for x in ["hive", "pig", "hadoop", "shell", "dbexport", "presto"]
    ])

    a0 = args.pop(0)

    if (a0 in cmdset):
        return cmdmain(a0[:a0.find(cmdsuffix)], args)

    if (a0 == "hadoop_cluster"):
        return clustermain(a0, args)

    sys.stderr.write("First command must be one of <%s>\n" %
                     "|".join(cmdset.union(["hadoop_cluster"])))
    usage(optparser)
Exemple #51
0
 def kill(self):
     conn = Qubole.agent()
     return conn.put(self.element_path(self.id) + "/kill", data=None)
Exemple #52
0
    def save_code(cls,
                  pipeline_id,
                  code=None,
                  file_path=None,
                  language=None,
                  jar_path=None,
                  main_class_name=None,
                  user_arguments=None):
        """
        :param file_path:
        :param code:
        :param language:
        :param user_arguments:
        :param pipeline_id:
        :param jar_path:
        :param main_class_name:
        :return:
        """
        data = None
        if cls.create_type == 2:
            if jar_path is None or main_class_name is None:
                raise ParseError("Provide Jar path for BYOJ mode.")
            else:
                cls.jar_path = jar_path
                data = {
                    "data": {
                        "attributes": {
                            "create_type": cls.create_type,
                            "user_arguments": str(user_arguments),
                            "jar_path": str(jar_path),
                            "main_class_name": str(main_class_name)
                        }
                    }
                }

        elif cls.create_type == 3:
            if code or file_path:
                try:
                    if file_path:
                        with open(file_path, 'r') as f:
                            code = f.read()
                    else:
                        code = code
                except IOError as e:
                    raise ParseError(
                        "Unable to open script location or script "
                        "location and code both are empty. ", e.message)
                cls.pipeline_code = code
                data = {
                    "data": {
                        "attributes": {
                            "create_type": cls.create_type,
                            "user_arguments": str(user_arguments),
                            "code": str(code),
                            "language": str(language)
                        }
                    }
                }

            else:
                raise ParseError(
                    "Provide code or file location for BYOC mode.")

        conn = Qubole.agent()
        url = cls.rest_entity_path + "/" + str(pipeline_id) + "/save_code"
        response = conn.put(url, data)
        log.debug(response)
        return response
Exemple #53
0
 def list_users(group_id):
     conn = Qubole.agent()
     url_path = "groups/%s/qbol_users" % group_id
     return conn.get(url_path)
Exemple #54
0
 def set_composition_for_cluster(self, **kwargs):
     cloud = Qubole.get_cloud()
     composition = cloud.get_composition(**kwargs)
     if composition is not None:
         self.cluster_info["composition"] = composition
Exemple #55
0
 def duplicate(group_id, **kwargs):
     conn = Qubole.agent()
     url_path = "groups/%s/duplicate" % group_id
     return conn.post(url_path, data=kwargs)
Exemple #56
0
 def set_cluster_info_from_arguments(self, arguments):
     customer_ssh_key = util._read_file(arguments.customer_ssh_key_file)
     self.set_cluster_info(
         disallow_cluster_termination=arguments.
         disallow_cluster_termination,
         enable_ganglia_monitoring=arguments.enable_ganglia_monitoring,
         datadog_api_token=arguments.datadog_api_token,
         datadog_app_token=arguments.datadog_app_token,
         node_bootstrap=arguments.node_bootstrap_file,
         master_instance_type=arguments.master_instance_type,
         slave_instance_type=arguments.slave_instance_type,
         min_nodes=arguments.initial_nodes,
         max_nodes=arguments.max_nodes,
         node_base_cooldown_period=arguments.node_base_cooldown_period,
         node_spot_cooldown_period=arguments.node_spot_cooldown_period,
         custom_tags=arguments.custom_tags,
         heterogeneous_config=arguments.heterogeneous_config,
         idle_cluster_timeout=arguments.idle_cluster_timeout,
         disk_count=arguments.count,
         disk_type=arguments.disk_type,
         disk_size=arguments.size,
         root_disk_size=arguments.root_disk_size,
         upscaling_config=arguments.upscaling_config,
         enable_encryption=arguments.encrypted_ephemerals,
         customer_ssh_key=customer_ssh_key,
         image_uri_overrides=arguments.image_uri_overrides,
         env_name=arguments.env_name,
         python_version=arguments.python_version,
         r_version=arguments.r_version,
         disable_cluster_pause=arguments.disable_cluster_pause,
         paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins,
         disable_autoscale_node_pause=arguments.
         disable_autoscale_node_pause,
         paused_autoscale_node_timeout_mins=arguments.
         paused_autoscale_node_timeout_mins,
         parent_cluster_id=arguments.parent_cluster_id,
         image_version=arguments.image_version)
     if Qubole.get_cloud_name() == "aws":
         # Need to move to aws cloud.
         self.set_composition(
             master_type=arguments.master_type,
             master_spot_block_duration=arguments.
             master_spot_block_duration,
             master_maximum_bid_price_percentage=arguments.
             master_maximum_bid_price_percentage,
             master_timeout_for_request=arguments.
             master_timeout_for_request,
             master_spot_fallback=arguments.master_spot_fallback,
             min_ondemand_percentage=arguments.min_ondemand_percentage,
             min_spot_block_percentage=arguments.min_spot_block_percentage,
             min_spot_block_duration=arguments.min_spot_block_duration,
             min_spot_percentage=arguments.min_spot_percentage,
             min_maximum_bid_price_percentage=arguments.
             min_maximum_bid_price_percentage,
             min_timeout_for_request=arguments.min_timeout_for_request,
             min_spot_allocation_strategy=arguments.
             min_spot_allocation_strategy,
             min_spot_fallback=arguments.min_spot_fallback,
             autoscaling_ondemand_percentage=arguments.
             autoscaling_ondemand_percentage,
             autoscaling_spot_block_percentage=arguments.
             autoscaling_spot_block_percentage,
             autoscaling_spot_percentage=arguments.
             autoscaling_spot_percentage,
             autoscaling_spot_block_duration=arguments.
             autoscaling_spot_block_duration,
             autoscaling_maximum_bid_price_percentage=arguments.
             autoscaling_maximum_bid_price_percentage,
             autoscaling_timeout_for_request=arguments.
             autoscaling_timeout_for_request,
             autoscaling_spot_allocation_strategy=arguments.
             autoscaling_spot_allocation_strategy,
             autoscaling_spot_fallback=arguments.autoscaling_spot_fallback,
             autoscaling_spot_block_fallback=arguments.
             autoscaling_spot_block_fallback)
     else:
         self.set_composition_from_cloud_using_parser(arguments)
Exemple #57
0
    def cluster_info_parser(argparser, action):
        create_required = False
        label_required = False
        if action == "create":
            create_required = True
        elif action == "update":
            argparser.add_argument("cluster_id_label",
                                   help="id/label of the cluster to update")
        elif action == "clone":
            argparser.add_argument("cluster_id_label",
                                   help="id/label of the cluster to update")
            label_required = True

        argparser.add_argument("--label",
                               dest="label",
                               nargs="+",
                               required=(create_required or label_required),
                               help="list of labels for the cluster" +
                               " (atleast one label is required)")
        cluster_info = argparser.add_argument_group("cluster_info")
        cluster_info.add_argument("--master-instance-type",
                                  dest="master_instance_type",
                                  help="instance type to use for the hadoop" +
                                  " master node")
        cluster_info.add_argument("--slave-instance-type",
                                  dest="slave_instance_type",
                                  help="instance type to use for the hadoop" +
                                  " slave nodes")
        cluster_info.add_argument(
            "--min-nodes",
            dest="initial_nodes",
            type=int,
            help="number of nodes to start the" + " cluster with",
        )
        cluster_info.add_argument("--max-nodes",
                                  dest="max_nodes",
                                  type=int,
                                  help="maximum number of nodes the cluster" +
                                  " may be auto-scaled up to")
        cluster_info.add_argument(
            "--idle-cluster-timeout",
            dest="idle_cluster_timeout",
            help="cluster termination timeout for idle cluster")
        cluster_info.add_argument(
            "--node-bootstrap-file",
            dest="node_bootstrap_file",
            help="""name of the node bootstrap file for this cluster. It
                                   should be in stored in S3 at
                                   <account-default-location>/scripts/hadoop/NODE_BOOTSTRAP_FILE
                                   """,
        )
        cluster_info.add_argument("--root-disk-size",
                                  dest="root_disk_size",
                                  type=int,
                                  help="size of the root volume in GB")
        cluster_info.add_argument(
            "--parent-cluster-id",
            dest="parent_cluster_id",
            type=int,
            help="Id of the parent cluster this hs2 cluster is attached to")
        cluster_info.add_argument("--image-version",
                                  dest="image_version",
                                  help="cluster image version")
        termination = cluster_info.add_mutually_exclusive_group()
        termination.add_argument(
            "--disallow-cluster-termination",
            dest="disallow_cluster_termination",
            action="store_true",
            default=None,
            help="don't auto-terminate idle clusters," +
            " use this with extreme caution",
        )
        termination.add_argument("--allow-cluster-termination",
                                 dest="disallow_cluster_termination",
                                 action="store_false",
                                 default=None,
                                 help="auto-terminate idle clusters,")

        node_cooldown_period_group = argparser.add_argument_group(
            "node cooldown period settings")
        node_cooldown_period_group.add_argument(
            "--node-base-cooldown-period",
            dest="node_base_cooldown_period",
            type=int,
            help="Cooldown period for on-demand nodes" + " unit: minutes")
        node_cooldown_period_group.add_argument(
            "--node-spot-cooldown-period",
            dest="node_spot_cooldown_period",
            type=int,
            help="Cooldown period for spot nodes" + " unit: minutes")
        cluster_info.add_argument("--customer-ssh-key",
                                  dest="customer_ssh_key_file",
                                  help="location for ssh key to use to" +
                                  " login to the instance")
        cluster_info.add_argument(
            "--custom-tags",
            dest="custom_tags",
            help="""Custom tags to be set on all instances
                                                 of the cluster. Specified as JSON object (key-value pairs)
                                                 e.g. --custom-ec2-tags '{"key1":"value1", "key2":"value2"}'
                                                 """,
        )

        # datadisk settings
        datadisk_group = argparser.add_argument_group("data disk settings")
        datadisk_group.add_argument(
            "--count",
            dest="count",
            type=int,
            help="Number of EBS volumes to attach to" +
            " each instance of the cluster",
        )
        datadisk_group.add_argument(
            "--disk-type",
            dest="disk_type",
            choices=["standard", "gp2"],
            help=
            "Type of the  volume attached to the instances. Valid values are "
            + "'standard' (magnetic) and 'gp2' (ssd).")
        datadisk_group.add_argument(
            "--size",
            dest="size",
            type=int,
            help="Size of each EBS volume, in GB",
        )
        datadisk_group.add_argument(
            "--upscaling-config",
            dest="upscaling_config",
            help="Upscaling config to be attached with the instances.",
        )
        ephemerals = datadisk_group.add_mutually_exclusive_group()
        ephemerals.add_argument(
            "--encrypted-ephemerals",
            dest="encrypted_ephemerals",
            action="store_true",
            default=None,
            help="encrypt the ephemeral drives on" + " the instance",
        )
        ephemerals.add_argument(
            "--no-encrypted-ephemerals",
            dest="encrypted_ephemerals",
            action="store_false",
            default=None,
            help="don't encrypt the ephemeral drives on" + " the instance",
        )

        cluster_info.add_argument("--heterogeneous-config",
                                  dest="heterogeneous_config",
                                  help="heterogeneous config for the cluster")

        composition_group = argparser.add_argument_group(
            "Cluster composition settings")
        Qubole.get_cloud().set_composition_arguments(composition_group)

        # monitoring settings
        monitoring_group = argparser.add_argument_group("monitoring settings")
        ganglia = monitoring_group.add_mutually_exclusive_group()
        ganglia.add_argument(
            "--enable-ganglia-monitoring",
            dest="enable_ganglia_monitoring",
            action="store_true",
            default=None,
            help="enable ganglia monitoring for the" + " cluster",
        )
        ganglia.add_argument(
            "--disable-ganglia-monitoring",
            dest="enable_ganglia_monitoring",
            action="store_false",
            default=None,
            help="disable ganglia monitoring for the" + " cluster",
        )

        datadog_group = argparser.add_argument_group("datadog settings")
        datadog_group.add_argument(
            "--datadog-api-token",
            dest="datadog_api_token",
            default=None,
            help="fernet key for airflow cluster",
        )
        datadog_group.add_argument(
            "--datadog-app-token",
            dest="datadog_app_token",
            default=None,
            help="overrides for airflow cluster",
        )

        internal_group = argparser.add_argument_group("internal settings")
        internal_group.add_argument(
            "--image-overrides",
            dest="image_uri_overrides",
            default=None,
            help="overrides for image",
        )

        env_group = argparser.add_argument_group("environment settings")
        env_group.add_argument("--env-name",
                               dest="env_name",
                               default=None,
                               help="name of Python and R environment")
        env_group.add_argument("--python-version",
                               dest="python_version",
                               default=None,
                               help="version of Python in environment")
        env_group.add_argument("--r-version",
                               dest="r_version",
                               default=None,
                               help="version of R in environment")

        start_stop_group = argparser.add_argument_group("start stop settings")
        start_stop_group.add_argument("--disable-cluster-pause",
                                      dest="disable_cluster_pause",
                                      action='store_true',
                                      default=None,
                                      help="disable cluster pause")
        start_stop_group.add_argument("--no-disable-cluster-pause",
                                      dest="disable_cluster_pause",
                                      action='store_false',
                                      default=None,
                                      help="disable cluster pause")
        start_stop_group.add_argument("--paused-cluster-timeout",
                                      dest="paused_cluster_timeout_mins",
                                      default=None,
                                      type=int,
                                      help="paused cluster timeout in min")
        start_stop_group.add_argument("--disable-autoscale-node-pause",
                                      dest="disable_autoscale_node_pause",
                                      action='store_true',
                                      default=None,
                                      help="disable autoscale node pause")
        start_stop_group.add_argument("--no-disable-autoscale-node-pause",
                                      dest="disable_autoscale_node_pause",
                                      action='store_false',
                                      default=None,
                                      help="disable autoscale node pause")
        start_stop_group.add_argument(
            "--paused-autoscale-node-timeout",
            dest="paused_autoscale_node_timeout_mins",
            default=None,
            type=int,
            help="paused autoscale node timeout in min")
def qb_configure(api_token, api_url):
    return Qubole.configure(api_token=api_token, api_url=api_url)
Exemple #59
0
def main():
    optparser = OptionParser(usage=usage_str)
    optparser.add_option("--token", dest="api_token",
                         default=os.getenv('QDS_API_TOKEN'),
                         help="api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN")

    optparser.add_option("--url", dest="api_url",
                         default=os.getenv('QDS_API_URL'),
                         help="base url for QDS REST API. defaults to https://api.qubole.com/api ")

    optparser.add_option("--version", dest="api_version",
                         default=os.getenv('QDS_API_VERSION'),
                         help="version of REST API to access. defaults to v1.2")

    optparser.add_option("--poll_interval", dest="poll_interval",
                         type=int,
                         default=os.getenv('QDS_POLL_INTERVAL'),
                         help="interval for polling API for completion and other events. defaults to 5s")

    optparser.add_option("--skip_ssl_cert_check", dest="skip_ssl_cert_check", action="store_true",
                         default=False,
                         help="skip verification of server SSL certificate. Insecure: use with caution.")

    optparser.add_option("-v", dest="verbose", action="store_true",
                         default=False,
                         help="verbose mode - info level logging")

    optparser.add_option("--vv", dest="chatty", action="store_true",
                         default=False,
                         help="very verbose mode - debug level logging")

    optparser.disable_interspersed_args()
    (options, args) = optparser.parse_args()
    
    if options.chatty:
        logging.basicConfig(level=logging.DEBUG)
    elif options.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARN)

    if options.api_token is None:
        sys.stderr.write("No API Token provided\n")
        usage(optparser)

    if options.api_url is None:
        options.api_url = "https://api.qubole.com/api/"

    if options.api_version is None:
        options.api_version = "v1.2"

    if options.poll_interval is None:
        options.poll_interval = 5

    if options.skip_ssl_cert_check is None:
        options.skip_ssl_cert_check = False
    elif options.skip_ssl_cert_check:
        log.warn("Insecure mode enabled: skipping SSL cert verification\n")

    Qubole.configure(api_token=options.api_token,
                     api_url=options.api_url,
                     version=options.api_version,
                     poll_interval=options.poll_interval,
                     skip_ssl_cert_check=options.skip_ssl_cert_check)

    if len(args) < 1:
        sys.stderr.write("Missing first argument containing subcommand\n")
        usage(optparser)

    a0 = args.pop(0)
    if a0 in CommandClasses:
        return cmdmain(a0, args)

    if a0 in SensorClasses:
        return sensormain(a0, args)

    if a0 == "account":
        return accountmain(args)

    if a0 == "cluster":
        api_version_number = float(options.api_version[1:])
        return clustermain(args, api_version_number)

    if a0 == "action":
        return actionmain(args)

    if a0 == "scheduler":
        return schedulermain(args)

    if a0 == "report":
        return reportmain(args)

    if a0 == "dbtap":
        return dbtapmain(args)

    if a0 == "group":
        return groupmain(args)

    if a0 == "role":
        return rolemain(args)

    if a0 == "app":
        return appmain(args)

    if a0 == "nezha":
        return nezhamain(args)

    if a0 == "user":
        return usermain(args)
    if a0 == "template":
        return templatemain(args)

    cmdset = set(CommandClasses.keys())
    sys.stderr.write("First command must be one of <%s>\n" %
                     "|".join(cmdset.union(["cluster", "action", "scheduler", "report",
                       "dbtap", "role", "group", "app", "account", "nezha", "user", "template"])))
    usage(optparser)
Exemple #60
0
# Downloading the result
def get_results(command):
    if command is None:
        return None
    filename = get_random_filename(10)
    print filename
    fp = open(filename, 'w')
    command.get_results(fp, delim="\n")
    print "Starting Result fetch with Command id: " + str(
        command.id) + "\nProgress: =>",
    while not SparkCommand.is_done(command.status):
        print "\b=>",
        time.sleep(5)
    if SparkCommand.is_success(command.status):
        print "\nCommand Executed: Results fetch completed successfully"
    else:
        print "\nCommand Executed: Result fetch for original command " + str(
            command.id) + "Failed!!!. The status returned is: " + str(
                command.status)
    fp.close()
    content = get_content(filename)
    return content


if __name__ == '__main__':
    # Stting API token
    Qubole.configure(api_token='<qubole-api-token>',
                     api_url='https://<env>.qubole.com/api')
    get_results(execute_query('select * from  default.customer limit 100'))