def reconfigureQueue(capacityProperties, yarnSiteProperty, mod_conf_path): ''' Reconfigures queue. If a key is present in two properties dicts, capacityProperties value takes precedence. ''' for key1 in yarnSiteProperty: if key1 not in capacityProperties: capacityProperties[key1] = yarnSiteProperty[key1] else: for key2 in yarnSiteProperty[key1]: if key2 not in capacityProperties[key1]: capacityProperties[key1][key2] = yarnSiteProperty[key1][ key2] #capacityProperties.update(yarnSiteProperty) Hadoop.modifyConfig(capacityProperties, {'services': ['all']}, makeCurrConfBackupInWindows=False) MAPRED.restartJobtracker(mod_conf_path)
class ConfigEngine(object): def __init__(self): self.zookeeper = Zookeeper() self.hadoop = Hadoop() self.hbase = HBase() self.hive = Hive() self.ignite = Ignite() def waiting(self): while True: print('Waiting...') time.sleep(5) def configure(self): self.zookeeper.configure() self.hadoop.configure() # self.hbase.configure() self.hive.configure() self.ignite.configure()
def getApp(appID, logoutput=True): ''' Gets JSON element of specific application ID. ''' if Hadoop.isEncrypted(): rmAppsUrl = YARN.getRMUrl() + "/ws/v1/cluster/apps" else: rmAppsUrl = YARN.getRMUrl() + "/ws/v1/cluster/apps" res = util.getHTTPResponse("JSON", rmAppsUrl) apps = util.getElement("JSON", res, "apps", False) appList = util.getElement("JSON", apps, "app", False) for app in appList: if util.getValue("JSON", util.getElement("JSON", app, "id")) == appID: if logoutput: logger.info("getApp Found: app = %s" % app) return app if logoutput: logger.info("getApp returns None") return None
def main(): #engine = Hadoop('bin/hadoop','/usr/local/hadoop-2.7.0/share/hadoop/tools/lib/hadoop-streaming-2.7.0.jar') engine = Hadoop(config.HADOOP_PATH, config.HADOOP_STREAMING_PATH) # Put files into Hadoop file_path = 'h1b_kaggle_1.csv' file_name = os.path.basename(file_path) engine.put_file(local_src=file_path, hadoop_dest=file_name, override=False) # Map-Reduce Tasks: default output_dir is 'output' result = engine.map_reduce(data_src=file_path, mapper='group_by_mapper.py', mapper_arguments=[3, 6], reducer='value_summation_reducer.py') print('output is', result) """ mapper arguments in case of group_by_mapper in bellow example is groupby = 5th column aggretate = 6th column """ result = engine.map_reduce(data_src=file_path, mapper='group_by_mapper.py', mapper_arguments=[5, 6], reducer='value_summation_reducer.py') print('output is', result) cache[(3, 6)] = result with open('sample_output.txt', 'w') as file: file.write(str(cache))
def __init__(self): self.zookeeper = Zookeeper() self.hadoop = Hadoop() self.hbase = HBase() self.hive = Hive() self.ignite = Ignite()
"| reboot : ") options, args = parser.parse_args() if len(sys.argv) == 1: print "Type python %s -h or --help for options help." % sys.argv[0] else: if options.command == "": print "Must given -c option\"s value" else: if options.command == "requireInstall": Infra.install(ClusterOptions(True)) if options.command == "deployAll": cluster_options = ClusterOptions(True) Zookeeper.install(cluster_options) Hadoop.install(cluster_options) Storm.install(cluster_options) Hive.install(cluster_options) HBase.install(cluster_options) if options.command == "startZookeeper": Zookeeper.start(ClusterOptions()) if options.command == "stopZookeeper": Zookeeper.stop(ClusterOptions()) if options.command == "startStorm": Storm.start(ClusterOptions()) if options.command == "initCluster": Hadoop.init(ClusterOptions())