def _start_server(cluster): args = [beeswax.conf.HIVE_SERVER_BIN.get()] env = cluster._mr2_env.copy() env.update({ 'HIVE_CONF_DIR': beeswax.conf.HIVE_CONF_DIR.get(), 'HIVE_SERVER2_THRIFT_PORT': str(HIVE_SERVER_TEST_PORT), 'HADOOP_MAPRED_HOME': get_run_root('ext/hadoop/hadoop') + '/share/hadoop/mapreduce', # Links created in jenkins script. # If missing classes when booting HS2, check here. 'AUX_CLASSPATH': get_run_root('ext/hadoop/hadoop') + '/share/hadoop/hdfs/hadoop-hdfs.jar' + ':' + get_run_root('ext/hadoop/hadoop') + '/share/hadoop/common/lib/hadoop-auth.jar' + ':' + get_run_root('ext/hadoop/hadoop') + '/share/hadoop/common/hadoop-common.jar' + ':' + get_run_root('ext/hadoop/hadoop') + '/share/hadoop/mapreduce/hadoop-mapreduce-client-core.jar' , 'HADOOP_CLASSPATH': '', }) if os.getenv("JAVA_HOME"): env["JAVA_HOME"] = os.getenv("JAVA_HOME") LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir)) return subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE)
def _start_mini_hs2(cluster): HIVE_CONF = cluster.hadoop_conf_dir finish = ( beeswax.conf.HIVE_SERVER_HOST.set_for_testing(get_localhost_name()), beeswax.conf.HIVE_SERVER_PORT.set_for_testing(HIVE_SERVER_TEST_PORT), beeswax.conf.HIVE_SERVER_BIN.set_for_testing(get_run_root("ext/hive/hive") + "/bin/hiveserver2"), beeswax.conf.HIVE_CONF_DIR.set_for_testing(HIVE_CONF), ) default_xml = """<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:derby:;databaseName=%(root)s/metastore_db;create=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>hive.server2.enable.impersonation</name> <value>false</value> </property> <property> <name>hive.querylog.location</name> <value>%(querylog)s</value> </property> </configuration> """ % { "root": cluster._tmpdir, "querylog": cluster.log_dir + "/hive", } file(HIVE_CONF + "/hive-site.xml", "w").write(default_xml) global _SHARED_HIVE_SERVER_PROCESS if _SHARED_HIVE_SERVER_PROCESS is None: p = _start_server(cluster) LOG.info("started") cluster.fs.do_as_superuser(cluster.fs.chmod, "/tmp", 01777) _SHARED_HIVE_SERVER_PROCESS = p def kill(): LOG.info("Killing server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) def s(): for f in finish: f() cluster.stop() return s
def _start_mini_hs2(cluster): HIVE_CONF = cluster.hadoop_conf_dir finish = ( beeswax.conf.HIVE_SERVER_HOST.set_for_testing(get_localhost_name()), beeswax.conf.HIVE_SERVER_PORT.set_for_testing(HIVE_SERVER_TEST_PORT), beeswax.conf.HIVE_SERVER_BIN.set_for_testing( get_run_root('ext/hive/hive') + '/bin/hiveserver2'), beeswax.conf.HIVE_CONF_DIR.set_for_testing(HIVE_CONF)) default_xml = """<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:derby:;databaseName=%(root)s/metastore_db;create=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>hive.server2.enable.impersonation</name> <value>false</value> </property> <property> <name>hive.querylog.location</name> <value>%(querylog)s</value> </property> </configuration> """ % { 'root': cluster._tmpdir, 'querylog': cluster.log_dir + '/hive' } file(HIVE_CONF + '/hive-site.xml', 'w').write(default_xml) global _SHARED_HIVE_SERVER_PROCESS if _SHARED_HIVE_SERVER_PROCESS is None: p = _start_server(cluster) LOG.info("started") cluster.fs.do_as_superuser(cluster.fs.chmod, '/tmp', 0o1777) _SHARED_HIVE_SERVER_PROCESS = p def kill(): LOG.info("Killing server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) def s(): for f in finish: f() cluster.stop() return s
def _start_server(cluster): args = [beeswax.conf.HIVE_SERVER_BIN.get()] env = cluster._mr2_env.copy() hadoop_cp_proc = subprocess.Popen(args=[get_run_root('ext/hadoop/hadoop') + '/bin/hadoop', 'classpath'], env=env, cwd=cluster._tmpdir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) hadoop_cp_proc.wait() hadoop_cp = hadoop_cp_proc.stdout.read().strip() env.update({ 'HADOOP_HOME': get_run_root('ext/hadoop/hadoop'), # Used only by Hive for some reason 'HIVE_CONF_DIR': beeswax.conf.HIVE_CONF_DIR.get(), 'HIVE_SERVER2_THRIFT_PORT': str(HIVE_SERVER_TEST_PORT), 'HADOOP_MAPRED_HOME': get_run_root('ext/hadoop/hadoop') + '/share/hadoop/mapreduce', # Links created in jenkins script. # If missing classes when booting HS2, check here. 'AUX_CLASSPATH': get_run_root('ext/hadoop/hadoop') + '/share/hadoop/hdfs/hadoop-hdfs.jar' + ':' + get_run_root('ext/hadoop/hadoop') + '/share/hadoop/common/lib/hadoop-auth.jar' + ':' + get_run_root('ext/hadoop/hadoop') + '/share/hadoop/common/hadoop-common.jar' + ':' + get_run_root('ext/hadoop/hadoop') + '/share/hadoop/mapreduce/hadoop-mapreduce-client-core.jar' , 'HADOOP_CLASSPATH': hadoop_cp, }) if os.getenv("JAVA_HOME"): env["JAVA_HOME"] = os.getenv("JAVA_HOME") LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir)) return subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE)
def _get_components(): components = [] try: components += _read_versions(os.path.join(get_run_root(), "VERSIONS")) extra_versions_path = os.path.join(get_var_root(), "EXTRA_VERSIONS") if os.path.exists(extra_versions_path): components += _read_versions(extra_versions_path) except ValueError:#Exception: components = [ ('HDP', "2.0.6"), ('Hadoop', "1.2.0.1.3.0.0-107"), ('HCatalog', "0.11.0.1.3.0.0-107"), ('Pig', "0.11.1.1.3.0.0-107"), ('Hive', "0.11.0.1.3.0.0-107"), ('Oozie', "3.3.2.1.3.0.0-107") ] if conf.TUTORIALS_INSTALLED.get(): components.insert(0, ('Tutorials', _get_tutorials_version())) # components.insert(0, ("Sandbox", conf.SANDBOX_VERSION.get())) return components, HUE_VERSION
def _start_server(cluster): args = [beeswax.conf.HIVE_SERVER_BIN.get()] env = cluster._mr2_env.copy() hadoop_cp_proc = subprocess.Popen( args=[get_run_root("ext/hadoop/hadoop") + "/bin/hadoop", "classpath"], env=env, cwd=cluster._tmpdir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) hadoop_cp_proc.wait() hadoop_cp = hadoop_cp_proc.stdout.read().strip() env.update( { "HADOOP_HOME": get_run_root("ext/hadoop/hadoop"), # Used only by Hive for some reason "HIVE_CONF_DIR": beeswax.conf.HIVE_CONF_DIR.get(), "HIVE_SERVER2_THRIFT_PORT": str(HIVE_SERVER_TEST_PORT), "HADOOP_MAPRED_HOME": get_run_root("ext/hadoop/hadoop") + "/share/hadoop/mapreduce", # Links created in jenkins script. # If missing classes when booting HS2, check here. "AUX_CLASSPATH": get_run_root("ext/hadoop/hadoop") + "/share/hadoop/hdfs/hadoop-hdfs.jar" + ":" + get_run_root("ext/hadoop/hadoop") + "/share/hadoop/common/lib/hadoop-auth.jar" + ":" + get_run_root("ext/hadoop/hadoop") + "/share/hadoop/common/hadoop-common.jar" + ":" + get_run_root("ext/hadoop/hadoop") + "/share/hadoop/mapreduce/hadoop-mapreduce-client-core.jar", "HADOOP_CLASSPATH": hadoop_cp, } ) if os.getenv("JAVA_HOME"): env["JAVA_HOME"] = os.getenv("JAVA_HOME") LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir)) return subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE)
def _get_hdfs_bin(self, env): try: return env['HDFS_BIN'] except KeyError: return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin', 'hdfs')
def _get_yarn_bin(self, env): try: return env['YARN_BIN'] except KeyError: return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin', 'yarn')
def _get_mapred_bin(self, env): try: return env['MAPRED_BIN'] except KeyError: return os.path.join(get_run_root('ext/hadoop/hadoop'), 'bin', 'mapred')
def start(self): LOG.info("Using temporary directory: %s" % (self._tmpdir, )) if not os.path.exists(self.hadoop_conf_dir): os.mkdir(self.hadoop_conf_dir) self._log_dir = self._tmppath('logs') if not os.path.exists(self._log_dir): os.mkdir(self._log_dir) self._local_dir = self._tmppath('local') if not os.path.exists(self._local_dir): os.mkdir(self._local_dir) self._write_hadoop_metrics_conf(self.hadoop_conf_dir) self._write_core_site() self._write_hdfs_site() self._write_yarn_site() self._write_mapred_site() # More stuff to setup in the environment env = { 'YARN_HOME': get_run_root('ext/hadoop/hadoop'), 'HADOOP_COMMON_HOME': get_run_root('ext/hadoop/hadoop'), 'HADOOP_MAPRED_HOME': get_run_root('ext/hadoop/hadoop'), 'HADOOP_HDFS_HOME': get_run_root('ext/hadoop/hadoop'), 'HADOOP_CONF_DIR': self.hadoop_conf_dir, 'YARN_CONF_DIR': self.hadoop_conf_dir, 'HADOOP_HEAPSIZE': '128', 'HADOOP_LOG_DIR': self._log_dir, 'USER': self.superuser, 'LANG': "en_US.UTF-8", 'PATH': os.environ['PATH'], } if "JAVA_HOME" in os.environ: env['JAVA_HOME'] = os.environ['JAVA_HOME'] LOG.debug("Hadoop Environment:\n" + "\n".join([str(x) for x in sorted(env.items())])) # Format HDFS self._format(self.hadoop_conf_dir, env) # Run them self._nn_proc = self._start_daemon('namenode', self.hadoop_conf_dir, env) self._dn_proc = self._start_daemon('datanode', self.hadoop_conf_dir, env) # Make sure they're running deadline = time.time() + STARTUP_DEADLINE while not self._is_hdfs_ready(env): if time.time() > deadline: self.stop() raise RuntimeError('%s is taking too long to start' % (self, )) time.sleep(5) # Start MR2 self._start_mr2(env) # Create HDFS directories if not self.fs.exists('/tmp'): self.fs.do_as_superuser(self.mkdir, '/tmp', 01777) self.fs.do_as_superuser(self.fs.chmod, '/tmp', 01777) self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn', 01777) self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn', 01777) self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn/staging', 01777) self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn/staging', 01777) self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn/staging/history', 01777) self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn/staging/history', 01777) self.fs.do_as_superuser(self.fs.mkdir, '/var/log/hadoop-yarn/apps', 01777) self.fs.do_as_superuser(self.fs.chmod, '/var/log/hadoop-yarn/apps', 01777) self.fs.create_home_dir('/user/test') self.fs.create_home_dir('/user/hue')
def get_shared_beeswax_server(): global _SHARED_HIVE_SERVER global _SHARED_HIVE_SERVER_CLOSER if _SHARED_HIVE_SERVER is None: cluster = pseudo_hdfs4.shared_cluster() HIVE_CONF = cluster.hadoop_conf_dir finish = ( beeswax.conf.HIVE_SERVER_HOST.set_for_testing(get_localhost_name()), beeswax.conf.HIVE_SERVER_PORT.set_for_testing(HIVE_SERVER_TEST_PORT), beeswax.conf.HIVE_SERVER_BIN.set_for_testing(get_run_root('ext/hive/hive') + '/bin/hiveserver2'), beeswax.conf.HIVE_CONF_DIR.set_for_testing(HIVE_CONF) ) default_xml = """<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:derby:;databaseName=%(root)s/metastore_db;create=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>hive.server2.enable.impersonation</name> <value>false</value> </property> <property> <name>hive.querylog.location</name> <value>%(querylog)s</value> </property> </configuration> """ % {'root': cluster._tmpdir, 'querylog': cluster.log_dir + '/hive'} file(HIVE_CONF + '/hive-site.xml', 'w').write(default_xml) global _SHARED_HIVE_SERVER_PROCESS if _SHARED_HIVE_SERVER_PROCESS is None: p = _start_server(cluster) LOG.info("started") _SHARED_HIVE_SERVER_PROCESS = p def kill(): LOG.info("Killing server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) start = time.time() started = False sleep = 0.001 make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) while not started and time.time() - start < 20.0: try: db.open_session(user) started = True break except Exception, e: LOG.info('HiveServer2 server status not started yet after: %s' % e) time.sleep(sleep) sleep *= 2 if not started: raise Exception("Server took too long to come up.") def s(): for f in finish: f() cluster.stop() _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
def _get_mapred_bin(self, env): try: return env["MAPRED_BIN"] except KeyError: return os.path.join(get_run_root("ext/hadoop/hadoop"), "bin", "mapred")
def start(self): LOG.info("Using temporary directory: %s" % (self._tmpdir,)) if not os.path.exists(self.hadoop_conf_dir): os.mkdir(self.hadoop_conf_dir) self._log_dir = self._tmppath("logs") if not os.path.exists(self._log_dir): os.mkdir(self._log_dir) self._local_dir = self._tmppath("local") if not os.path.exists(self._local_dir): os.mkdir(self._local_dir) self._write_hadoop_metrics_conf(self.hadoop_conf_dir) self._write_core_site() self._write_hdfs_site() self._write_yarn_site() self._write_mapred_site() # More stuff to setup in the environment env = { "YARN_HOME": get_run_root("ext/hadoop/hadoop"), "HADOOP_COMMON_HOME": get_run_root("ext/hadoop/hadoop"), "HADOOP_MAPRED_HOME": get_run_root("ext/hadoop/hadoop"), "HADOOP_HDFS_HOME": get_run_root("ext/hadoop/hadoop"), "HADOOP_CONF_DIR": self.hadoop_conf_dir, "YARN_CONF_DIR": self.hadoop_conf_dir, "HADOOP_HEAPSIZE": "128", "HADOOP_LOG_DIR": self._log_dir, "USER": self.superuser, "LANG": "en_US.UTF-8", "PATH": os.environ["PATH"], } if "JAVA_HOME" in os.environ: env["JAVA_HOME"] = os.environ["JAVA_HOME"] LOG.debug("Hadoop Environment:\n" + "\n".join([str(x) for x in sorted(env.items())])) # Format HDFS self._format(self.hadoop_conf_dir, env) # Run them self._nn_proc = self._start_daemon("namenode", self.hadoop_conf_dir, env) self._dn_proc = self._start_daemon("datanode", self.hadoop_conf_dir, env) # Make sure they're running deadline = time.time() + STARTUP_DEADLINE while not self._is_hdfs_ready(env): if time.time() > deadline: self.stop() raise RuntimeError("%s is taking too long to start" % (self,)) time.sleep(5) # Start MR2 self._start_mr2(env) # Create HDFS directories if not self.fs.exists("/tmp"): self.fs.do_as_superuser(self.mkdir, "/tmp", 01777) self.fs.do_as_superuser(self.fs.chmod, "/tmp", 01777) self.fs.do_as_superuser(self.fs.mkdir, "/tmp/hadoop-yarn", 01777) self.fs.do_as_superuser(self.fs.chmod, "/tmp/hadoop-yarn", 01777) self.fs.do_as_superuser(self.fs.mkdir, "/tmp/hadoop-yarn/staging", 01777) self.fs.do_as_superuser(self.fs.chmod, "/tmp/hadoop-yarn/staging", 01777) self.fs.do_as_superuser(self.fs.mkdir, "/tmp/hadoop-yarn/staging/history", 01777) self.fs.do_as_superuser(self.fs.chmod, "/tmp/hadoop-yarn/staging/history", 01777) self.fs.do_as_superuser(self.fs.mkdir, "/var/log/hadoop-yarn/apps", 01777) self.fs.do_as_superuser(self.fs.chmod, "/var/log/hadoop-yarn/apps", 01777) self.fs.create_home_dir("/user/test") self.fs.create_home_dir("/user/hue")
def get_shared_beeswax_server(): global _SHARED_HIVE_SERVER global _SHARED_HIVE_SERVER_CLOSER if _SHARED_HIVE_SERVER is None: cluster = pseudo_hdfs4.shared_cluster() HIVE_CONF = cluster.hadoop_conf_dir finish = (beeswax.conf.HIVE_SERVER_HOST.set_for_testing( get_localhost_name()), beeswax.conf.HIVE_SERVER_PORT.set_for_testing( HIVE_SERVER_TEST_PORT), beeswax.conf.HIVE_SERVER_BIN.set_for_testing( get_run_root('ext/hive/hive') + '/bin/hiveserver2'), beeswax.conf.HIVE_CONF_DIR.set_for_testing(HIVE_CONF)) default_xml = """<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:derby:;databaseName=%(root)s/metastore_db;create=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>hive.server2.enable.impersonation</name> <value>false</value> </property> <property> <name>hive.querylog.location</name> <value>%(querylog)s</value> </property> </configuration> """ % { 'root': cluster._tmpdir, 'querylog': cluster.log_dir + '/hive' } file(HIVE_CONF + '/hive-site.xml', 'w').write(default_xml) global _SHARED_HIVE_SERVER_PROCESS if _SHARED_HIVE_SERVER_PROCESS is None: p = _start_server(cluster) LOG.info("started") cluster.fs.do_as_superuser(cluster.fs.chmod, '/tmp', 01777) _SHARED_HIVE_SERVER_PROCESS = p def kill(): LOG.info("Killing server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) start = time.time() started = False sleep = 1 make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) while not started and time.time() - start <= 30: try: db.open_session(user) started = True break except Exception, e: LOG.info( 'HiveServer2 server status not started yet after: %s' % e) time.sleep(sleep) if not started: raise Exception("Server took too long to come up.") def s(): for f in finish: f() cluster.stop() _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
def start(self): LOG.info("Using temporary directory: %s" % (self._tmpdir,)) if not os.path.exists(self.hadoop_conf_dir): os.mkdir(self.hadoop_conf_dir) self._log_dir = self._tmppath('logs') if not os.path.exists(self._log_dir): os.mkdir(self._log_dir) self._local_dir = self._tmppath('local') if not os.path.exists(self._local_dir): os.mkdir(self._local_dir) self._write_hadoop_metrics_conf(self.hadoop_conf_dir) self._write_core_site() self._write_hdfs_site() self._write_yarn_site() self._write_mapred_site() # More stuff to setup in the environment env = { 'YARN_HOME': get_run_root('ext/hadoop/hadoop'), 'HADOOP_COMMON_HOME': get_run_root('ext/hadoop/hadoop'), 'HADOOP_MAPRED_HOME': get_run_root('ext/hadoop/hadoop'), 'HADOOP_HDFS_HOME': get_run_root('ext/hadoop/hadoop'), 'HADOOP_CONF_DIR': self.hadoop_conf_dir, 'YARN_CONF_DIR': self.hadoop_conf_dir, 'HADOOP_HEAPSIZE': '128', 'HADOOP_LOG_DIR': self._log_dir, 'USER': self.superuser, 'LANG': "en_US.UTF-8", 'PATH': os.environ['PATH'], } if "JAVA_HOME" in os.environ: env['JAVA_HOME'] = os.environ['JAVA_HOME'] LOG.debug("Hadoop Environment:\n" + "\n".join([ str(x) for x in sorted(env.items()) ])) # Format HDFS self._format(self.hadoop_conf_dir, env) # Run them self._nn_proc = self._start_daemon('namenode', self.hadoop_conf_dir, env) self._dn_proc = self._start_daemon('datanode', self.hadoop_conf_dir, env) # Make sure they're running deadline = time.time() + STARTUP_DEADLINE while not self._is_hdfs_ready(env): if time.time() > deadline: self.stop() raise RuntimeError('%s is taking too long to start' % (self,)) time.sleep(5) # Start MR2 self._start_mr2(env) # Create HDFS directories if not self.fs.exists('/tmp'): self.fs.do_as_superuser(self.mkdir, '/tmp', 01777) self.fs.do_as_superuser(self.fs.chmod, '/tmp', 01777) self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn', 01777) self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn', 01777) self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn/staging', 01777) self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn/staging', 01777) self.fs.do_as_superuser(self.fs.mkdir, '/tmp/hadoop-yarn/staging/history', 01777) self.fs.do_as_superuser(self.fs.chmod, '/tmp/hadoop-yarn/staging/history', 01777) self.fs.do_as_superuser(self.fs.mkdir, '/var/log/hadoop-yarn/apps', 01777) self.fs.do_as_superuser(self.fs.chmod, '/var/log/hadoop-yarn/apps', 01777) self.fs.do_as_user('test', self.fs.create_home_dir, '/user/test') self.fs.do_as_user('hue', self.fs.create_home_dir, '/user/hue')
def _get_yarn_bin(self, env): try: return env["YARN_BIN"] except KeyError: return os.path.join(get_run_root("ext/hadoop/hadoop"), "bin", "yarn")
class SqoopServerProvider(object): """ Setup a Sqoop server. """ TEST_PORT = '19080' TEST_SHUTDOWN_PORT = '19081' HOME = get_run_root('ext/sqoop/sqoop') requires_hadoop = True integration = True is_running = False @classmethod def setup_class(cls): if not is_live_cluster(): raise SkipTest() cls.cluster = pseudo_hdfs4.shared_cluster() cls.client, callback = cls.get_shared_server() cls.shutdown = [callback] @classmethod def initialize(cls, tmpdir): hadoop_conf_dir = os.path.join(tmpdir, 'conf') base_dir = os.path.join(tmpdir, 'sqoop') log_dir = os.path.join(base_dir, 'logs') conf_dir = os.path.join(base_dir, 'conf') old_conf_dir = os.path.join(SqoopServerProvider.HOME, 'server/conf') if not os.path.exists(hadoop_conf_dir): os.mkdir(hadoop_conf_dir) if not os.path.exists(base_dir): os.mkdir(base_dir) if not os.path.exists(log_dir): os.mkdir(log_dir) if not os.path.exists(conf_dir): os.mkdir(conf_dir) for _file in ('sqoop.properties', 'sqoop_bootstrap.properties'): with open(os.path.join(old_conf_dir, _file), 'r') as _original: with open(os.path.join(conf_dir, _file), 'w') as _new: for _line in _original: line = _line.replace('${test.log.dir}', log_dir) line = line.replace('${test.hadoop.conf.dir}', hadoop_conf_dir) line = line.replace('${test.base.dir}', base_dir) _new.write(line) # This sets JAVA_OPTS with a sqoop conf... we need to use our own. os.chmod( os.path.join(SqoopServerProvider.HOME, 'server/bin/setenv.sh'), 0) @classmethod def start(cls, cluster): """ Start oozie process. """ SqoopServerProvider.initialize(cluster._tmpdir) env = os.environ env['CATALINA_HOME'] = os.path.join(SqoopServerProvider.HOME, 'server') env['CATALINA_PID'] = os.path.join(cluster._tmpdir, 'sqoop/sqoop.pid') env['CATALINA_OPTS'] = """ -Dtest.log.dir=%(log_dir)s -Dtest.host.local=%(host)s -Dsqoop.http.port=%(http_port)s -Dsqoop.admin.port=%(admin_port)s """ % { 'log_dir': os.path.join(cluster._tmpdir, 'sqoop/logs'), 'host': socket.getfqdn(), 'http_port': SqoopServerProvider.TEST_PORT, 'admin_port': SqoopServerProvider.TEST_SHUTDOWN_PORT } env['SQOOP_HTTP_PORT'] = SqoopServerProvider.TEST_PORT env['SQOOP_ADMIN_PORT'] = SqoopServerProvider.TEST_SHUTDOWN_PORT env['JAVA_OPTS'] = '-Dsqoop.config.dir=%s' % os.path.join( cluster._tmpdir, 'sqoop/conf') args = [ os.path.join(SqoopServerProvider.HOME, 'bin/sqoop.sh'), 'server', 'start' ] LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir)) process = subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE) return process @classmethod def get_shared_server(cls, username='******', language=settings.LANGUAGE_CODE): callback = lambda: None with service_lock: if not SqoopServerProvider.is_running: # Setup cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): finish = () else: LOG.info( '\nStarting a Mini Sqoop. Requires "tools/jenkins/jenkins.sh" to be previously ran.\n' ) finish = (SERVER_URL.set_for_testing( "http://%s:%s/sqoop" % (socket.getfqdn(), SqoopServerProvider.TEST_PORT)), ) p = cls.start(cluster) def kill(): with open( os.path.join(cluster._tmpdir, 'sqoop/sqoop.pid'), 'r') as pidfile: pid = pidfile.read() LOG.info("Killing Sqoop server (pid %s)." % pid) os.kill(int(pid), 9) p.wait() atexit.register(kill) start = time.time() started = False sleep = 0.01 client = SqoopClient(SERVER_URL.get(), username, language) while not started and time.time() - start < 60.0: LOG.info('Check Sqoop status...') try: version = client.get_version() except RestException as e: LOG.exception( 'Exception fetching the Sqoop server version') # Don't loop if we had an authentication error. if e.code == 401: raise except Exception as e: LOG.info('Sqoop server not started yet: %s' % e) else: if version: started = True break time.sleep(sleep) sleep *= 2 if not started: raise Exception("Sqoop server took too long to come up.") def shutdown(): for f in finish: f() cluster.stop() callback = shutdown SqoopServerProvider.is_running = True else: client = SqoopClient(SERVER_URL.get(), username, language) return client, callback
'APP_DIRS': True, }, ] # Desktop doesn't use an auth profile module, because # because it doesn't mesh very well with the notion # of having multiple apps. If your app needs # to store data related to users, it should # manage its own table with an appropriate foreign key. AUTH_PROFILE_MODULE = None LOGIN_REDIRECT_URL = "/" LOGOUT_REDIRECT_URL = "/" # For djangosaml2 bug. PYLINTRC = get_run_root('.pylintrc') # Custom CSRF Failure View CSRF_FAILURE_VIEW = 'desktop.views.csrf_failure' ############################################################ # Part 4: Installation of apps ############################################################ _config_dir = os.getenv("HUE_CONF_DIR", get_desktop_root("conf")) # Libraries are loaded and configured before the apps appmanager.load_libs() _lib_conf_modules = [ dict(module=app.conf, config_key=None) for app in appmanager.DESKTOP_LIBS if app.conf is not None
class OozieServerProvider(object): """ Setup a Oozie server. """ OOZIE_TEST_PORT = '18001' OOZIE_HOME = get_run_root('ext/oozie/oozie') requires_hadoop = True is_oozie_running = False @classmethod def setup_class(cls): cls.cluster = pseudo_hdfs4.shared_cluster() cls.oozie, callback = cls._get_shared_oozie_server() cls.shutdown = [callback] @classmethod def wait_until_completion(cls, oozie_jobid, timeout=300.0, step=5): job = cls.oozie.get_job(oozie_jobid) start = time.time() while job.is_running() and (time.time() - start) < timeout: time.sleep(step) LOG.info('Checking status of %s...' % oozie_jobid) job = cls.oozie.get_job(oozie_jobid) LOG.info('[%d] Status after %d: %s' % (time.time(), time.time() - start, job)) logs = cls.oozie.get_job_log(oozie_jobid) if job.is_running(): msg = "[%d] %s took more than %d to complete: %s" % ( time.time(), oozie_jobid, timeout, logs) LOG.info(msg) raise Exception(msg) else: LOG.info('[%d] Job %s took %d: %s' % (time.time(), job.id, time.time() - start, logs)) return job @classmethod def _setup_conf_dir(cls, cluster): original_oozie_conf_dir = '%s/conf' % OozieServerProvider.OOZIE_HOME shutil.copytree(original_oozie_conf_dir, cluster._tmppath('conf/oozie')) cls._write_oozie_site(cluster) @classmethod def _write_oozie_site(cls, cluster): oozie_configs = { 'oozie.service.ProxyUserService.proxyuser.hue.hosts': '*', 'oozie.service.ProxyUserService.proxyuser.hue.groups': '*', 'oozie.service.HadoopAccessorService.hadoop.configurations': '*=%s' % cluster._tmppath('conf'), 'oozie.db.schema.name': 'oozie', 'oozie.data.dir': cluster._tmppath('oozie_tmp_dir'), 'oozie.service.JPAService.create.db.schema': 'false', 'oozie.service.JPAService.jdbc.driver': 'org.apache.derby.jdbc.EmbeddedDriver', 'oozie.service.JPAService.jdbc.url': 'jdbc:derby:${oozie.data.dir}/${oozie.db.schema.name}-db;create=true', 'oozie.service.JPAService.jdbc.username': '******', 'oozie.service.JPAService.jdbc.password': '', 'oozie.service.SchemaService.wf.ext.schemas': '''shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,email-action-0.1.xsd,hive-action-0.2.xsd, hive-action-0.3.xsd,hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd,sqoop-action-0.3.xsd, sqoop-action-0.4.xsd,ssh-action-0.1.xsd,ssh-action-0.2.xsd,distcp-action-0.1.xsd,distcp-action-0.2.xsd, oozie-sla-0.1.xsd,oozie-sla-0.2.xsd, hive2-action-0.1.xsd, spark-action-0.1.xsd''', 'oozie.service.ActionService.executor.ext.classes': '''org.apache.oozie.action.email.EmailActionExecutor, org.apache.oozie.action.hadoop.HiveActionExecutor, org.apache.oozie.action.hadoop.ShellActionExecutor, org.apache.oozie.action.hadoop.SqoopActionExecutor, org.apache.oozie.action.hadoop.DistcpActionExecutor, org.apache.oozie.action.hadoop.Hive2ActionExecutor, org.apache.oozie.action.ssh.SshActionExecutor, org.apache.oozie.action.oozie.SubWorkflowActionExecutor, org.apache.oozie.action.hadoop.SparkActionExecutor''', 'oozie.service.coord.normal.default.timeout': 120 } write_config(oozie_configs, cluster._tmppath('conf/oozie/oozie-site.xml')) @classmethod def _start_oozie(cls, cluster): """ Start oozie process. """ OozieServerProvider._setup_conf_dir(cluster) args = [OozieServerProvider.OOZIE_HOME + '/bin/oozied.sh', 'run'] env = os.environ env['OOZIE_DATA'] = cluster._tmppath('oozie_tmp_dir') env['OOZIE_HTTP_PORT'] = OozieServerProvider.OOZIE_TEST_PORT conf_dir = os.path.join(cluster.log_dir, 'oozie') os.mkdir(conf_dir) env['OOZIE_LOG'] = conf_dir env['OOZIE_CONFIG'] = cluster._tmppath('conf/oozie') LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir)) process = subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE) return process @classmethod def _reset_oozie(cls, cluster): env = os.environ env['OOZIE_DATA'] = cluster._tmppath('oozie_tmp_dir') args = [ 'rm', '-r', '%s/data/oozie-db' % cluster._tmppath('oozie_tmp_dir') ] LOG.info("Executing %s, env %s" % (args, env)) subprocess.call(args, env=env) args = [ OozieServerProvider.OOZIE_HOME + '/bin/ooziedb.sh', 'create', '-sqlfile', 'oozie.sql', '-run' ] LOG.info("Executing %s, env %s" % (args, env)) subprocess.call(args, env=env) @classmethod def _setup_sharelib(cls): LOG.info("Copying Oozie sharelib") user_home = cls.cluster.fs.do_as_user(getpass.getuser(), cls.cluster.fs.get_home_dir) oozie_share_lib = user_home + '/share' cls.cluster.fs.do_as_user(getpass.getuser(), cls.cluster.fs.create_home_dir) env = os.environ args = [ OozieServerProvider.OOZIE_HOME + '/bin/oozie-setup.sh', 'sharelib', 'create', '-fs', cls.cluster.fs.fs_defaultfs, '-locallib', OozieServerProvider.OOZIE_HOME + '/oozie-sharelib.tar.gz' ] LOG.info("Executing %s, env %s" % (args, env)) subprocess.call(args, env=env) LOG.info("Oozie sharelib copied to %s" % oozie_share_lib) @classmethod def _get_shared_oozie_server(cls): callback = lambda: None _oozie_lock.acquire() try: if not OozieServerProvider.is_oozie_running: cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): def shutdown(): pass else: LOG.info( '\nStarting a Mini Oozie. Requires "tools/jenkins/jenkins.sh" to be previously ran.\n' ) LOG.info( 'See https://issues.cloudera.org/browse/HUE-861\n') finish = (OOZIE_URL.set_for_testing( "http://%s:%s/oozie" % (socket.getfqdn(), OozieServerProvider.OOZIE_TEST_PORT)), ) # Setup cls._setup_sharelib() cls._reset_oozie(cluster) p = cls._start_oozie(cluster) def kill(): LOG.info("Killing Oozie server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) def shutdown(): for f in finish: f() cluster.stop() start = time.time() started = False sleep = 0.01 while not started and time.time() - start < 30.0: status = None try: LOG.info('Check Oozie status...') status = get_oozie( cluster.superuser).get_oozie_status() if status['systemMode'] == 'NORMAL': started = True break time.sleep(sleep) sleep *= 2 except Exception, e: LOG.info( 'Oozie server status not NORMAL yet: %s - %s' % (status, e)) time.sleep(sleep) sleep *= 2 pass if not started: raise Exception("Oozie server took too long to come up.") OozieServerProvider.is_oozie_running = True callback = shutdown finally: _oozie_lock.release() cluster = pseudo_hdfs4.shared_cluster() return get_oozie(cluster.superuser), callback
def _get_hdfs_bin(self, env): try: return env["HDFS_BIN"] except KeyError: return os.path.join(get_run_root("ext/hadoop/hadoop"), "bin", "hdfs")
class OozieServerProvider(object): """ Setup a Oozie server. """ OOZIE_TEST_PORT = '18080' OOZIE_HOME = get_run_root('ext/oozie/oozie') requires_hadoop = True is_oozie_running = False @classmethod def setup_class(cls): cls.cluster = pseudo_hdfs4.shared_cluster() cls.oozie, callback = cls._get_shared_oozie_server() cls.shutdown = [callback] @classmethod def wait_until_completion(cls, oozie_jobid, timeout=300.0, step=5): job = cls.oozie.get_job(oozie_jobid) start = time.time() while job.is_running() and time.time() - start < timeout: time.sleep(step) LOG.info('Checking status of %s...' % oozie_jobid) job = cls.oozie.get_job(oozie_jobid) LOG.info('[%d] Status after %d: %s' % (time.time(), time.time() - start, job)) logs = cls.oozie.get_job_log(oozie_jobid) if job.is_running(): msg = "[%d] %s took more than %d to complete: %s" % ( time.time(), oozie_jobid, timeout, logs) LOG.info(msg) raise Exception(msg) else: LOG.info('[%d] Job %s tooke %d: %s' % (time.time(), job.id, time.time() - start, logs)) return job @classmethod def _start_oozie(cls, cluster): """ Start oozie process. """ args = [OozieServerProvider.OOZIE_HOME + '/bin/oozied.sh', 'run'] env = os.environ env['OOZIE_HTTP_PORT'] = OozieServerProvider.OOZIE_TEST_PORT conf_dir = os.path.join(cluster.log_dir, 'oozie') os.mkdir(conf_dir) env['OOZIE_LOG'] = conf_dir LOG.info("Executing %s, env %s, cwd %s" % (repr(args), repr(env), cluster._tmpdir)) process = subprocess.Popen(args=args, env=env, cwd=cluster._tmpdir, stdin=subprocess.PIPE) return process @classmethod def _reset_oozie(cls): env = os.environ args = ['rm', '-r', OozieServerProvider.OOZIE_HOME + '/data/oozie-db'] LOG.info("Executing %s, env %s" % (args, env)) subprocess.call(args, env=env) args = [ OozieServerProvider.OOZIE_HOME + '/bin/ooziedb.sh', 'create', '-sqlfile', 'oozie.sql', '-run' ] LOG.info("Executing %s, env %s" % (args, env)) subprocess.call(args, env=env) @classmethod def _setup_sharelib(cls): cls.cluster.fs.do_as_user('oozie', cls.cluster.fs.create_home_dir, '/user/oozie') cls.cluster.fs.do_as_user('oozie', cls.cluster.fs.copyFromLocal, OozieServerProvider.OOZIE_HOME + '/share', '/user/oozie/') @classmethod def _get_shared_oozie_server(cls): callback = lambda: None _oozie_lock.acquire() if not OozieServerProvider.is_oozie_running: LOG.info( '\nStarting a Mini Oozie. Requires "tools/jenkins/jenkins.sh" to be previously ran.\n' ) LOG.info('See https://issues.cloudera.org/browse/HUE-861\n') finish = (OOZIE_URL.set_for_testing( "http://localhost:%s/oozie" % OozieServerProvider.OOZIE_TEST_PORT), ) # Setup cluster = pseudo_hdfs4.shared_cluster() cls._setup_sharelib() cls._reset_oozie() p = cls._start_oozie(cluster) def kill(): LOG.info("Killing Oozie server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) start = time.time() started = False sleep = 0.01 while not started and time.time() - start < 30.0: status = None try: LOG.info('Check Oozie status...') status = get_oozie().get_oozie_status() if status['systemMode'] == 'NORMAL': started = True break time.sleep(sleep) sleep *= 2 except Exception, e: LOG.info('Oozie server status not NORMAL yet: %s - %s' % (status, e)) time.sleep(sleep) sleep *= 2 pass if not started: raise Exception("Oozie server took too long to come up.") OozieServerProvider.is_oozie_running = True def shutdown(): for f in finish: f() cluster.stop() callback = shutdown _oozie_lock.release() return get_oozie(), callback