Example #1
0
def _configure_hive(cluster):
    manager = pu.get_manager(cluster)
    with manager.remote() as r:
        db_helper.create_hive_database(cluster, r)

    # Hive requires /tmp/hive-hive directory
    namenode = pu.get_namenode(cluster)
    with namenode.remote() as r:
        r.execute_command(
            'sudo su - -c "hadoop fs -mkdir -p /tmp/hive-hive" hdfs')
        r.execute_command(
            'sudo su - -c "hadoop fs -chown hive /tmp/hive-hive" hdfs')
Example #2
0
 def test_get_namenode(self):
     cluster = tu.get_fake_cluster()
     inst = u.get_namenode(cluster)
     self.assertEqual('id2', inst.instance_id)
Example #3
0
 def get_name_node_uri(self, cluster):
     namenode_ip = cu.get_namenode(cluster).fqdn()
     return 'hdfs://%s:8020' % namenode_ip
Example #4
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {}
    if cluster:
        zk_count = v._get_inst_count(cluster, 'ZOOKEEPER_SERVER')
        hbm_count = v._get_inst_count(cluster, 'HBASE_MASTER')
        snt_count = v._get_inst_count(cluster, 'SENTRY_SERVER')
        ks_count = v._get_inst_count(cluster, 'KEY_VALUE_STORE_INDEXER')
        imp_count = v._get_inst_count(cluster, 'IMPALA_CATALOGSERVER')
        core_site_safety_valve = ''
        if c_helper.is_swift_enabled(cluster):
            configs = swift_helper.get_swift_configs()
            confs = dict((c['name'], c['value']) for c in configs)
            core_site_safety_valve = xmlutils.create_elements_xml(confs)
        all_confs = {
            'HDFS': {
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else '',
                'dfs_block_local_path_access_user':
                '******' if imp_count else '',
                'core_site_safety_valve': core_site_safety_valve
            },
            'HIVE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'OOZIE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'YARN': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'HUE': {
                'hive_service': cu.HIVE_SERVICE_NAME,
                'oozie_service': cu.OOZIE_SERVICE_NAME,
                'sentry_service': cu.SENTRY_SERVICE_NAME if snt_count else '',
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'SPARK_ON_YARN': {
                'yarn_service': cu.YARN_SERVICE_NAME
            },
            'HBASE': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME,
                'hbase_enable_indexing': 'true' if ks_count else 'false',
                'hbase_enable_replication': 'true' if ks_count else 'false'
            },
            'FLUME': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else ''
            },
            'SENTRY': {
                'hdfs_service': cu.HDFS_SERVICE_NAME
            },
            'SOLR': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
            },
            'SQOOP': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            },
            'KS_INDEXER': {
                'hbase_service': cu.HBASE_SERVICE_NAME,
                'solr_service': cu.SOLR_SERVICE_NAME
            },
            'IMPALA': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else '',
                'hive_service': cu.HIVE_SERVICE_NAME
            }
        }
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type':
                'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port':
                '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster)
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs':
                cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE')
            }
        }
        sentry_confs = {
            'SENTRY': {
                'sentry_server_database_type':
                'postgresql',
                'sentry_server_database_host':
                pu.get_manager(cluster).internal_ip,
                'sentry_server_database_port':
                '7432',
                'sentry_server_database_password':
                db_helper.get_sentry_db_password(cluster)
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, sentry_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'),
                'dfs_datanode_data_dir_perm': 755,
                'dfs_datanode_handler_count': 30
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs':
                get_hadoop_dirs(paths, '/yarn/local')
            },
            'SERVER': {
                'maxSessionTimeout': 60000
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    return all_confs.get(service, {})
Example #5
0
 def get_name_node_uri(self, cluster):
     namenode_ip = cu.get_namenode(cluster).fqdn()
     return 'hdfs://%s:8020' % namenode_ip
Example #6
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {
        'OOZIE': {
            'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
        },
        'YARN': {
            'hdfs_service': cu.HDFS_SERVICE_NAME
        },
        'HUE': {
            'hive_service': cu.HIVE_SERVICE_NAME,
            'oozie_service': cu.OOZIE_SERVICE_NAME
        },
        'SPARK_ON_YARN': {
            'yarn_service': cu.YARN_SERVICE_NAME
        }
    }

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn')
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths,
                                                               '/yarn/local')
            }
        }

        ng_user_confs = node_group.node_configs
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    if cluster:
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type': 'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port': '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster),
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster),
                                                'NAMENODE')
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    return all_confs.get(service, {})
Example #7
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {}
    if cluster:
        zk_count = v._get_inst_count(cluster, 'ZOOKEEPER_SERVER')
        hbm_count = v._get_inst_count(cluster, 'HBASE_MASTER')
        snt_count = v._get_inst_count(cluster, 'SENTRY_SERVER')
        ks_count = v._get_inst_count(cluster, 'KEY_VALUE_STORE_INDEXER')
        imp_count = v._get_inst_count(cluster, 'IMPALA_CATALOGSERVER')
        core_site_safety_valve = ''
        if c_helper.is_swift_enabled(cluster):
            configs = swift_helper.get_swift_configs()
            confs = dict((c['name'], c['value']) for c in configs)
            core_site_safety_valve = xmlutils.create_elements_xml(confs)
        all_confs = {
            'HDFS': {
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else '',
                'dfs_block_local_path_access_user':
                    '******' if imp_count else '',
                'core_site_safety_valve': core_site_safety_valve
            },
            'HIVE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'OOZIE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'YARN': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'HUE': {
                'hive_service': cu.HIVE_SERVICE_NAME,
                'oozie_service': cu.OOZIE_SERVICE_NAME,
                'sentry_service': cu.SENTRY_SERVICE_NAME if snt_count else '',
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'SPARK_ON_YARN': {
                'yarn_service': cu.YARN_SERVICE_NAME
            },
            'HBASE': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME,
                'hbase_enable_indexing': 'true' if ks_count else 'false',
                'hbase_enable_replication': 'true' if ks_count else 'false'
            },
            'FLUME': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else ''
            },
            'SENTRY': {
                'hdfs_service': cu.HDFS_SERVICE_NAME
            },
            'SOLR': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
            },
            'SQOOP': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            },
            'KS_INDEXER': {
                'hbase_service': cu.HBASE_SERVICE_NAME,
                'solr_service': cu.SOLR_SERVICE_NAME
            },
            'IMPALA': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else '',
                'hive_service': cu.HIVE_SERVICE_NAME
            }
        }
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type': 'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port': '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster)
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster),
                                                'NAMENODE')
            }
        }
        sentry_confs = {
            'SENTRY': {
                'sentry_server_database_type': 'postgresql',
                'sentry_server_database_host':
                pu.get_manager(cluster).internal_ip,
                'sentry_server_database_port': '7432',
                'sentry_server_database_password':
                db_helper.get_sentry_db_password(cluster)
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, sentry_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'),
                'dfs_datanode_data_dir_perm': 755,
                'dfs_datanode_handler_count': 30
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths,
                                                               '/yarn/local')
            },
            'SERVER': {
                'maxSessionTimeout': 60000
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    return all_confs.get(service, {})
Example #8
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {
        'OOZIE': {
            'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
        },
        'YARN': {
            'hdfs_service': cu.HDFS_SERVICE_NAME
        },
        'HUE': {
            'hive_service': cu.HIVE_SERVICE_NAME,
            'oozie_service': cu.OOZIE_SERVICE_NAME
        },
        'SPARK_ON_YARN': {
            'yarn_service': cu.YARN_SERVICE_NAME
        },
        'HBASE': {
            'hdfs_service': cu.HDFS_SERVICE_NAME,
            'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
        }
    }

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn')
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs':
                get_hadoop_dirs(paths, '/yarn/local')
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    if cluster:
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type':
                'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port':
                '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster),
                'mapreduce_yarn_service':
                cu.YARN_SERVICE_NAME
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs':
                cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE')
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    return all_confs.get(service, {})