def reconfigureQueue(capacityProperties, yarnSiteProperty, mod_conf_path):
    '''
    Reconfigures queue.
    If a key is present in two properties dicts, capacityProperties value takes precedence.
    '''
    for key1 in yarnSiteProperty:
        if key1 not in capacityProperties:
            capacityProperties[key1] = yarnSiteProperty[key1]
        else:
            for key2 in yarnSiteProperty[key1]:
                if key2 not in capacityProperties[key1]:
                    capacityProperties[key1][key2] = yarnSiteProperty[key1][
                        key2]
    #capacityProperties.update(yarnSiteProperty)

    Hadoop.modifyConfig(capacityProperties, {'services': ['all']},
                        makeCurrConfBackupInWindows=False)
    MAPRED.restartJobtracker(mod_conf_path)
Esempio n. 2
0
class ConfigEngine(object):
    def __init__(self):
        self.zookeeper = Zookeeper()
        self.hadoop = Hadoop()
        self.hbase = HBase()
        self.hive = Hive()
        self.ignite = Ignite()
        
    def waiting(self):
        while True:
            print('Waiting...')
            time.sleep(5)
    
    def configure(self):
        self.zookeeper.configure()
        self.hadoop.configure()
#         self.hbase.configure()
        self.hive.configure()
        self.ignite.configure()
Esempio n. 3
0
class ConfigEngine(object):
    def __init__(self):
        self.zookeeper = Zookeeper()
        self.hadoop = Hadoop()
        self.hbase = HBase()
        self.hive = Hive()
        self.ignite = Ignite()

    def waiting(self):
        while True:
            print('Waiting...')
            time.sleep(5)

    def configure(self):
        self.zookeeper.configure()
        self.hadoop.configure()
        #         self.hbase.configure()
        self.hive.configure()
        self.ignite.configure()
def getApp(appID, logoutput=True):
    '''
    Gets JSON element of specific application ID.
    '''
    if Hadoop.isEncrypted():
        rmAppsUrl = YARN.getRMUrl() + "/ws/v1/cluster/apps"
    else:
        rmAppsUrl = YARN.getRMUrl() + "/ws/v1/cluster/apps"
    res = util.getHTTPResponse("JSON", rmAppsUrl)
    apps = util.getElement("JSON", res, "apps", False)
    appList = util.getElement("JSON", apps, "app", False)
    for app in appList:
        if util.getValue("JSON", util.getElement("JSON", app, "id")) == appID:
            if logoutput:
                logger.info("getApp Found: app = %s" % app)
            return app
    if logoutput:
        logger.info("getApp returns None")
    return None
Esempio n. 5
0
def main():
    #engine = Hadoop('bin/hadoop','/usr/local/hadoop-2.7.0/share/hadoop/tools/lib/hadoop-streaming-2.7.0.jar')

    engine = Hadoop(config.HADOOP_PATH, config.HADOOP_STREAMING_PATH)

    # Put files into Hadoop
    file_path = 'h1b_kaggle_1.csv'
    file_name = os.path.basename(file_path)

    engine.put_file(local_src=file_path, hadoop_dest=file_name, override=False)

    # Map-Reduce Tasks: default output_dir is 'output'

    result = engine.map_reduce(data_src=file_path,
                               mapper='group_by_mapper.py',
                               mapper_arguments=[3, 6],
                               reducer='value_summation_reducer.py')

    print('output is', result)
    """
    mapper arguments in case of group_by_mapper in bellow example is 
    groupby = 5th column
    aggretate = 6th column
    """

    result = engine.map_reduce(data_src=file_path,
                               mapper='group_by_mapper.py',
                               mapper_arguments=[5, 6],
                               reducer='value_summation_reducer.py')

    print('output is', result)

    cache[(3, 6)] = result

    with open('sample_output.txt', 'w') as file:
        file.write(str(cache))
Esempio n. 6
0
 def __init__(self):
     self.zookeeper = Zookeeper()
     self.hadoop = Hadoop()
     self.hbase = HBase()
     self.hive = Hive()
     self.ignite = Ignite()
Esempio n. 7
0
                           "| reboot           : ")
    options, args = parser.parse_args()

    if len(sys.argv) == 1:
        print "Type python %s -h or --help for options help." % sys.argv[0]
    else:
        if options.command == "":
            print "Must given -c option\"s value"
        else:
            if options.command == "requireInstall":
                Infra.install(ClusterOptions(True))

            if options.command == "deployAll":
                cluster_options = ClusterOptions(True)
                Zookeeper.install(cluster_options)
                Hadoop.install(cluster_options)
                Storm.install(cluster_options)
                Hive.install(cluster_options)
                HBase.install(cluster_options)

            if options.command == "startZookeeper":
                Zookeeper.start(ClusterOptions())

            if options.command == "stopZookeeper":
                Zookeeper.stop(ClusterOptions())

            if options.command == "startStorm":
                Storm.start(ClusterOptions())

            if options.command == "initCluster":
                Hadoop.init(ClusterOptions())
Esempio n. 8
0
 def __init__(self):
     self.zookeeper = Zookeeper()
     self.hadoop = Hadoop()
     self.hbase = HBase()
     self.hive = Hive()
     self.ignite = Ignite()