def generate_morphline_config(self, destination): # TODO manage generic config, cf. MorphlineIndexer morphline_config = open(os.path.join(config_morphline_path(), 'hue_accesslogs_no_geo.morphline.conf')).read() morphline_config = morphline_config.replace( '${SOLR_COLLECTION}', destination['name'] ).replace( '${ZOOKEEPER_ENSEMBLE}', '%s/solr' % zkensemble() ) return ('agent_morphlines_conf_file', morphline_config)
def update_flume_config(request): api = ManagerApi(request.user) flume_agent_config = '''tier1.sources = source1 tier1.channels = channel1 tier1.sinks = sink1 tier1.sources.source1.type = exec tier1.sources.source1.command = tail -F /var/log/hue-httpd/access_log tier1.sources.source1.channels = channel1 tier1.channels.channel1.type = memory tier1.channels.channel1.capacity = 10000 tier1.channels.channel1.transactionCapacity = 1000 # Solr Sink configuration tier1.sinks.sink1.type = org.apache.flume.sink.solr.morphline.MorphlineSolrSink tier1.sinks.sink1.morphlineFile = morphlines.conf tier1.sinks.sink1.morphlineId = hue_accesslogs_no_geo tier1.sinks.sink1.channel = channel1''' morphline_config = open( os.path.join(config_morphline_path(), 'hue_accesslogs_no_geo.morphline.conf')).read() morphline_config = morphline_config.replace('${SOLR_COLLECTION}', 'log_analytics_demo').replace( '${ZOOKEEPER_ENSEMBLE}', '%s/solr' % zkensemble()) responses = {} responses['agent_config_file'] = api.update_flume_config( cluster_name=None, config_name='agent_config_file', config_value=flume_agent_config) responses['agent_morphlines_conf_file'] = api.update_flume_config( cluster_name=None, config_name='agent_morphlines_conf_file', config_value=morphline_config) responses['refresh_flume'] = api.refresh_flume(cluster_name=None, restart=True) return JsonResponse(responses)
def generate_config(self, properties): configs = {} if properties['inputFormat'] == 'stream': if properties['streamSelection'] == 'kafka': if properties['topics'] == 'NavigatorAuditEvents': morphline_config = open( os.path.join(config_morphline_path(), 'navigator_topic.morphline.conf')).read() configs[ 'navigator_topic.morphline.conf'] = morphline_config.replace( '${SOLR_COLLECTION}', 'empty').replace('${ZOOKEEPER_ENSEMBLE}', '%s/solr' % zkensemble()) input = """ type = kafka brokers = "%(brokers)s" topics = [%(topics)s] //group.id = nav-envelope encoding = bytearray parameter.auto.offset.reset = earliest translator { type = morphline encoding.key = UTF8 encoding.message = UTF8 morphline.file = "navigator_topic.morphline.conf" morphline.id = "nav-json-input" field.names = [%(kafkaFieldNames)s] field.types = [%(kafkaFieldTypes)s] } %(window)s """ % properties else: input = """type = kafka brokers = "%(brokers)s" topics = [%(topics)s] encoding = string translator { type = %(kafkaFieldType)s delimiter = "%(kafkaFieldDelimiter)s" field.names = [%(kafkaFieldNames)s] field.types = [%(kafkaFieldTypes)s] } %(window)s """ % properties else: raise PopupException( _('Stream format of %(inputFormat)s not recognized: %(streamSelection)s' ) % properties) elif properties['inputFormat'] == 'connector': # sfdc input = """type = sfdc mode = fetch-all sobject = %(streamObject)s sfdc: { partner: { username = "******" password = "******" token = "%(streamToken)s" auth-endpoint = "%(streamEndpointUrl)s" } } """ % properties elif properties['inputFormat'] == 'file': input = """type = filesystem path = %(input_path)s format = %(format)s """ % properties else: raise PopupException( _('Input format not recognized: %(inputFormat)s') % properties) extra_step = '' properties['output_deriver'] = """ deriver { type = sql query.literal = \"\"\"SELECT * from inputdata\"\"\" }""" if properties['inputFormat'] == 'stream' and properties[ 'topics'] == 'NavigatorAuditEvents': # Kudu does not support upper case names properties['output_deriver'] = """ deriver { type = sql query.literal = \"\"\" SELECT concat_ws('-', time, service, user) as id, -- timeDate todo additionalInfo as additionalinfo, allowed, collectionName as collectionname, databaseName as databasename, db, DELEGATION_TOKEN_ID as delegation_token_id, dst, entityId as entityid, time, family, impersonator, ip, name, objectType as objecttype, objType as objtype, objUsageType as objusagetype, op, operationParams as operationparams, operationText as operationtext, opText as optext, path, perms, privilege, qualifier, QUERY_ID as query_id, resourcePath as resourcepath, service, SESSION_ID as session_id, solrVersion as solrversion, src, status, subOperation as suboperation, tableName as tablename, `table` as `table`, type, url, user FROM inputdata \"\"\" }""" if properties['ouputFormat'] == 'file': output = """ %(output_deriver)s planner = { type = overwrite } output = { type = filesystem path = %(path)s format = %(format)s header = true }""" % properties elif properties['ouputFormat'] == 'table': # TODO: look at table output type instead and merge if properties['inputFormat'] == 'stream' and properties[ 'streamSelection'] == 'kafka': output = """ %(output_deriver)s planner { type = upsert } output { type = kudu connection = "%(kudu_master)s" table.name = "%(output_table)s" }""" % properties else: output = """ %(output_deriver)s planner { type = append } output { type = hive table.name = "%(output_table)s" }""" % properties elif properties['ouputFormat'] == 'index': if True: # Workaround until envelope Solr output is official morphline_config = open( os.path.join(config_morphline_path(), 'navigator_topic.morphline.conf')).read() configs[ 'navigator_topic.morphline.conf'] = morphline_config.replace( '${SOLR_COLLECTION}', properties['collectionName']).replace( '${ZOOKEEPER_ENSEMBLE}', '%s/solr' % zkensemble()) output = """ // Load events to a Solr index // TODO: Move this to a SolrOutput step, when this is available deriver { type = morphline step.name = kafkaInput morphline.file = ${vars.morphline.file} morphline.id = ${vars.morphline.solr.indexer} field.names = ${vars.json.field.names} field.types = ${vars.json.field.types} } """ % properties extra_step = """ solrOutput { dependencies = [outputdata] deriver { type = sql query.literal = \"\"\" SELECT * FROM outputdata LIMIT 0 \"\"\" } planner = { type = append } output = { type = log path = ${vars.hdfs.basedir} format = csv } }""" % properties else: output = """ %(output_deriver)s planner { type = upstert } output { type = solr connection = "%(connection)s" collection.name = "%(collectionName)s" }""" % properties elif properties['ouputFormat'] == 'stream': output = """ %(output_deriver)s planner { type = append } output { type = kafka brokers = "%(brokers)s" topic = %(topics)s serializer.type = delimited serializer.field.delimiter = "," }""" % properties else: raise PopupException( _('Output format not recognized: %(ouputFormat)s') % properties) configs['envelope.conf'] = """ application { name = %(app_name)s %(batch)s executors = 1 executor.cores = 1 executor.memory = 1G } steps { inputdata { input { %(input)s } } outputdata { dependencies = [inputdata] %(output)s } %(extra_step)s } """ % { 'input': input, 'output': output, 'extra_step': extra_step, 'app_name': properties['app_name'], 'batch': 'batch.milliseconds = 5000' if properties['inputFormat'] == 'stream' else '' } return configs