Exemple #1
0
 def integrate(self):
     """
     Integrates Hadoop and GPDB by performing the following:
     
     1. Setup kerberos server
     2. Setup hadoop cluster
     3. Setup GPDB configurations
     4. Create sql sepcific test data
     """
     # check for GPHOME and MASTER_DATA_DIRECTORY
     # throw exception is not set
     gphome = os.getenv("GPHOME")
     if not gphome:
         raise HadoopIntegrationException("GPHOME not set!!")
     mdd = os.getenv("MASTER_DATA_DIRECTORY")
     if not mdd:
         raise HadoopIntegrationException("MASTER_DATA_DIRECTORY not set!!")
     self.fqdn = self.hostname + '.' + self.domain
     # check if hostname is present in /etc/hosts
     # if not append the hostname to file
     self._validate_hostname()
     # setup kerberos server if security is enabled
     if self.secure_hadoop:
         self.kerberos_template_conf = local_path(os.path.join(self.template_conf_dir, "kerberos"))
         self.kerberos_util = KerberosUtil(self.fqdn, self.domain, self.kerberos_template_conf, self.node_list)
         self.kerberos_util.configure_server()
         self.kerberos_util.get_kerberos_ticket("hdfs")
         self.kerberos_util.get_kerberos_ticket("gpadmin")
     # setup hadoop cluster
     hadoop_conf_dir = local_path(os.path.join(self.template_conf_dir, "hdfs/rpm"))
     if self.hadoop_type == "phd":
         self.hadoop_util = PHDRpmUtil(self.hadoop_artifact_url, self.hadoop_install_dir, self.hadoop_data_dir, hadoop_conf_dir, self.fqdn, self.secure_hadoop)
         hadoop_guc = "gphd-2.0"
     elif self.hadoop_type == "cdh":
         self.hadoop_util = CDHRpmUtil(self.hadoop_artifact_url, self.hadoop_install_dir, self.hadoop_data_dir, hadoop_conf_dir, self.fqdn, self.secure_hadoop)
         hadoop_guc = "cdh4.1"
     elif self.hadoop_type == "apache":
         self.hadoop_util = ApacheTarUtil(self.hadoop_artifact_url, self.hadoop_install_dir, self.hadoop_data_dir, hadoop_conf_dir, self.fqdn, self.secure_hadoop)
         hadoop_guc = "gphd-2.0"
     # setup up hadoop cluster
     self.hadoop_util.init_cluster()
     hadoop_home = self.hadoop_util.get_hadoop_env()['HADOOP_HOME']
     hadoop_common_home = self.hadoop_util.get_hadoop_env()['HADOOP_COMMON_HOME']
     if self.hadoop_type == "apache":
         hadoop_common_home = hadoop_common_home + "common"
     # setup up GPDB configurations & test data
     gpdb_template_conf = local_path(os.path.join(self.template_conf_dir, "gpdb"))
     self._setup_gpdb_configurations(gphome, mdd, gpdb_template_conf, hadoop_home, hadoop_common_home, hadoop_guc)
     export_env = "export HADOOP_HOME=%s; source %s/lib/hadoop/hadoop_env.sh;" %(hadoop_home, gphome)
     java_classpath = ".:$CLASSPATH:%s/lib/hadoop/%s" %(gphome, self.gphdfs_connector)
     self._create_test_jars(export_env, java_classpath)
     self.java_cmd = self._create_java_cmd_string(export_env, java_classpath)
     test_data_types = [
                         'regression', 'time', 'timestamp', 'date',  \
                         'bigint', 'int', 'smallint', 'real', 'float',   \
                         'boolean', 'varchar', 'bpchar', 'numeric', 'text', 'all'
                       ]
     datasize = 5000
     largedatasize = str(int(datasize) * 2000)
     self._create_test_data(datasize, largedatasize, test_data_types)